libmatchtigs/implementation/
mod.rs

1// Some of the if branches might be more readable if not collapsed
2#![allow(clippy::collapsible_if)]
3// This lint never produces correct suggestions in our case.
4#![allow(clippy::mutex_atomic)]
5
6use genome_graph::bigraph::algo::eulerian::{
7    compute_eulerian_superfluous_out_biedges, find_non_eulerian_binodes_with_differences,
8};
9use genome_graph::bigraph::interface::dynamic_bigraph::DynamicEdgeCentricBigraph;
10use genome_graph::bigraph::interface::static_bigraph::StaticBigraph;
11use genome_graph::bigraph::interface::static_bigraph::StaticEdgeCentricBigraph;
12use genome_graph::bigraph::interface::BidirectedData;
13use genome_graph::bigraph::traitgraph::index::{GraphIndex, OptionalGraphIndex};
14use genome_graph::bigraph::traitgraph::interface::{GraphBase, ImmutableGraphContainer};
15use genome_graph::bigraph::traitgraph::traitsequence::interface::Sequence;
16use genome_graph::bigraph::traitgraph::walks::{EdgeWalk, VecEdgeWalk};
17use itertools::Itertools;
18use log::{info, warn};
19use simplelog::{ColorChoice, CombinedLogger, Config, LevelFilter, TermLogger, TerminalMode};
20use std::collections::BTreeMap;
21use std::ffi::{OsStr, OsString};
22use std::fs::File;
23use std::io::BufWriter;
24use std::ops::Range;
25use std::path::{Path, PathBuf};
26use std::str::FromStr;
27use std::sync::atomic::{AtomicBool, Ordering};
28use traitgraph_algo::dijkstra::{DijkstraTargetMap, DijkstraWeightedEdgeData};
29
30pub mod eulertigs;
31pub mod greedytigs;
32pub mod matchtigs;
33pub mod pathtigs;
34
35const TARGET_DIJKSTRA_BLOCK_TIME: f32 = 5.0; // seconds
36
37pub fn initialise_logging(log_level: LevelFilter) {
38    CombinedLogger::init(vec![TermLogger::new(
39        log_level,
40        Config::default(),
41        TerminalMode::Mixed,
42        ColorChoice::Auto,
43    )])
44    .unwrap();
45
46    info!("Logging initialised successfully");
47}
48
49/// An algorithm to compute tigs for a graph.
50pub trait TigAlgorithm<Graph: GraphBase>: Default {
51    /// The configuration of the algorithm.
52    type Configuration;
53
54    /// Compute the tigs given a graph and configuration.
55    fn compute_tigs(
56        graph: &mut Graph,
57        configuration: &Self::Configuration,
58    ) -> Vec<VecEdgeWalk<Graph>>;
59}
60
61/// The type of the data structure to store the weight of visited nodes in Dijkstra's algorithm.
62#[derive(Eq, PartialEq, Debug, Clone, Copy)]
63pub enum NodeWeightArrayType {
64    /// Use the [EpochNodeWeightArray].
65    EpochNodeWeightArray,
66
67    /// Use the [hashbrown::HashMap] to store node weights.
68    HashbrownHashMap,
69}
70
71impl FromStr for NodeWeightArrayType {
72    type Err = String;
73
74    fn from_str(s: &str) -> Result<Self, Self::Err> {
75        Ok(match s {
76            "EpochNodeWeightArray" => Self::EpochNodeWeightArray,
77            "HashbrownHashMap" => Self::HashbrownHashMap,
78            other => {
79                return Err(format!("Unknown node weight array type: {other}"));
80            }
81        })
82    }
83}
84
85/// The heap data structure used by Dijkstra's algorithm.
86#[derive(Eq, PartialEq, Debug, Clone, Copy)]
87pub enum HeapType {
88    /// Use the [BinaryHeap](std::collections::BinaryHeap).
89    StdBinaryHeap,
90}
91
92impl FromStr for HeapType {
93    type Err = String;
94
95    fn from_str(s: &str) -> Result<Self, Self::Err> {
96        Ok(match s {
97            "StdBinaryHeap" => Self::StdBinaryHeap,
98            other => {
99                return Err(format!("Unknown heap type: {other}"));
100            }
101        })
102    }
103}
104
105/// The performance data collector used by Dijkstra's algorithm.
106#[derive(Eq, PartialEq, Debug, Clone, Copy)]
107pub enum PerformanceDataType {
108    /// Collect no performance data.
109    None,
110    /// Collect all possible performance data.
111    Complete,
112}
113
114impl FromStr for PerformanceDataType {
115    type Err = String;
116
117    fn from_str(s: &str) -> Result<Self, Self::Err> {
118        Ok(match s {
119            "None" => Self::None,
120            "Complete" => Self::Complete,
121            other => {
122                return Err(format!("Unknown performance data type: {other}"));
123            }
124        })
125    }
126}
127
128pub struct RelaxedAtomicBoolVec {
129    map: Vec<AtomicBool>,
130}
131
132impl RelaxedAtomicBoolVec {
133    pub fn new(len: usize) -> Self {
134        Self {
135            map: std::iter::repeat(false).map(Into::into).take(len).collect(),
136        }
137    }
138
139    pub fn set(&self, index: usize, value: bool) {
140        self.map[index].store(value, Ordering::Relaxed);
141    }
142
143    pub fn get(&self, index: usize) -> bool {
144        self.map[index].load(Ordering::Relaxed)
145    }
146
147    /*pub fn swap(&self, index: usize, value: bool) -> bool {
148        self.map[index].swap(value, Ordering::Relaxed)
149    }*/
150
151    pub fn reinitialise(&mut self, len: usize) {
152        self.map.clear();
153        self.map
154            .extend(std::iter::repeat(false).map(Into::into).take(len));
155    }
156
157    pub fn slice(&self, range: Range<usize>) -> RelaxedAtomicBoolSlice {
158        RelaxedAtomicBoolSlice {
159            map: &self.map[range],
160        }
161    }
162
163    pub fn iter(&self) -> impl '_ + Iterator<Item = bool> {
164        self.map.iter().map(|b| b.load(Ordering::Relaxed))
165    }
166}
167
168pub struct RelaxedAtomicBoolSlice<'a> {
169    map: &'a [AtomicBool],
170}
171
172impl RelaxedAtomicBoolSlice<'_> {
173    pub fn set(&self, index: usize, value: bool) {
174        self.map[index].store(value, Ordering::Relaxed);
175    }
176
177    /*pub fn get(&self, index: usize) -> bool {
178        self.map[index].load(Ordering::Relaxed)
179    }*/
180}
181
182impl<Graph: GraphBase> DijkstraTargetMap<Graph> for RelaxedAtomicBoolVec {
183    fn is_target(&self, node_index: Graph::NodeIndex) -> bool {
184        self.get(node_index.as_usize())
185    }
186}
187
188pub struct GraphMatchingNodeMap {
189    node_id_map: Vec<Vec<usize>>,
190    current_node_id: usize,
191}
192
193impl GraphMatchingNodeMap {
194    pub fn new<Graph: StaticBigraph>(graph: &Graph) -> Self {
195        Self {
196            node_id_map: vec![Vec::new(); graph.node_count()],
197            current_node_id: 0,
198        }
199    }
200
201    pub fn get_or_create_node_indexes<
202        EdgeData: BidirectedData + Eq,
203        Graph: StaticEdgeCentricBigraph<EdgeData = EdgeData>,
204    >(
205        &mut self,
206        graph: &Graph,
207        node_index: Graph::NodeIndex,
208    ) -> &[usize] {
209        let result = &mut self.node_id_map[node_index.as_usize()];
210        if result.is_empty() {
211            for _ in 0..compute_eulerian_superfluous_out_biedges(graph, node_index).abs() {
212                result.push(self.current_node_id);
213                self.current_node_id += 1;
214            }
215
216            self.node_id_map[graph.mirror_node(node_index).unwrap().as_usize()] = result.clone();
217        }
218
219        // Retrieve the value again for lifetime reasons.
220        let result = &self.node_id_map[node_index.as_usize()];
221        debug_assert!(!result.is_empty());
222        result
223    }
224
225    pub fn get_node_indexes<
226        OptionalNodeIndex: OptionalGraphIndex<NodeIndex>,
227        NodeIndex: GraphIndex<OptionalNodeIndex>,
228    >(
229        &self,
230        node_index: NodeIndex,
231    ) -> &[usize] {
232        let result = &self.node_id_map[node_index.as_usize()];
233        debug_assert!(!result.is_empty());
234        result
235    }
236
237    pub fn get_node_indexes_unchecked<
238        OptionalNodeIndex: OptionalGraphIndex<NodeIndex>,
239        NodeIndex: GraphIndex<OptionalNodeIndex>,
240    >(
241        &self,
242        node_index: NodeIndex,
243    ) -> &[usize] {
244        &self.node_id_map[node_index.as_usize()]
245    }
246
247    pub fn node_count(&self) -> usize {
248        self.current_node_id
249    }
250}
251
252pub fn choose_in_node_from_iterator<
253    EdgeData: BidirectedData + Eq,
254    Graph: StaticEdgeCentricBigraph<EdgeData = EdgeData>, //, EdgeData = BidirectedGfaNodeData<EdgeData>>,
255    InNodeIterator: Iterator<Item = Graph::NodeIndex>,
256>(
257    graph: &Graph,
258    mut in_node_iterator: InNodeIterator,
259    out_node_difference: isize,
260    out_node: Graph::NodeIndex,
261) -> Option<Graph::NodeIndex> {
262    let mut in_node = Some(in_node_iterator.next().unwrap());
263    if (in_node == Some(graph.mirror_node(out_node).unwrap()) && out_node_difference > -2)
264        || in_node == Some(out_node)
265    {
266        // If the in_node is the mirror of the out_node, adding an edge would create a self loop that does not resolve anything regarding Eulerianess.
267        // And if the in_node is the out_node, then we have a self-mirror, and again adding a self loop does not resolve anything regarding Eulerianess.
268        if in_node == Some(graph.mirror_node(out_node).unwrap()) && out_node_difference > -2 {
269            warn!("Adding expensive edges in a different order because chosen nodes {} and {} are mirrors of each other", in_node.unwrap().as_usize(), out_node.as_usize());
270        } else if in_node == Some(out_node) {
271            warn!("Adding expensive edges in a different order because chosen nodes are a self-mirror");
272        }
273        in_node = if let Some(in_node) = in_node_iterator.next() {
274            Some(in_node)
275        } else {
276            debug_assert_ne!(compute_eulerian_superfluous_out_biedges(graph, out_node), 0);
277            debug_assert_ne!(
278                compute_eulerian_superfluous_out_biedges(graph, in_node.unwrap()),
279                0
280            );
281            None
282        };
283    }
284    in_node
285}
286
287/// The edge data of the graph to compute matchtigs on has to implement this.
288pub trait MatchtigEdgeData<SequenceHandle>:
289    DijkstraWeightedEdgeData<usize> + BidirectedData
290{
291    /// Returns true if the edge is a dummy edge.
292    /// This is the case if the `dummy_id` given in [Self::new] is non-zero.
293    fn is_dummy(&self) -> bool;
294
295    /// Return true if the edge is an original edge.
296    fn is_original(&self) -> bool {
297        !self.is_dummy()
298    }
299
300    /// Returns true if this is the forwards variant of this edge,
301    /// i.e. if the `sequence_handle` given in [Self::new] is the sequence belonging to this edge.
302    /// This is the case if the `forwards` given in [Self::new] is `true`.
303    fn is_forwards(&self) -> bool;
304
305    /// Returns true if this is the backwards variant of this edge,
306    /// i.e. if the `sequence_handle` given in [Self::new] is the reverse complement of the sequence belonging to this edge.
307    /// This is the case if the `forwards` given in [Self::new] is `false`.
308    fn is_backwards(&self) -> bool {
309        !self.is_forwards()
310    }
311
312    /// Creates a new edge with the given properties.
313    /// The given values must be returned by the respective functions of this trait,
314    /// and the `weight` by the implementation of [DijkstraWeightedEdgeData](traitgraph_algo::dijkstra::DijkstraWeightedEdgeData) of this type.
315    fn new(sequence_handle: SequenceHandle, forwards: bool, weight: usize, dummy_id: usize)
316        -> Self;
317}
318
319pub fn debug_assert_graph_has_no_consecutive_dummy_edges<
320    SequenceHandle,
321    EdgeData: MatchtigEdgeData<SequenceHandle> + Eq,
322    Graph: StaticEdgeCentricBigraph<EdgeData = EdgeData>,
323>(
324    graph: &Graph,
325    k: usize,
326) {
327    if !cfg!(debug_assertions) {
328        return;
329    }
330
331    let mut dummy_in_edges = Vec::new();
332    let mut dummy_cheap_in_edges = Vec::new();
333    let mut dummy_expensive_in_edges = Vec::new();
334    let mut dummy_out_edges = Vec::new();
335    let mut dummy_cheap_out_edges = Vec::new();
336    let mut dummy_expensive_out_edges = Vec::new();
337    for node_index in graph.node_indices() {
338        dummy_in_edges.clear();
339        dummy_cheap_in_edges.clear();
340        dummy_expensive_in_edges.clear();
341        for in_neighbor in graph.in_neighbors(node_index) {
342            let edge_data = graph.edge_data(in_neighbor.edge_id);
343            if edge_data.is_dummy() {
344                dummy_in_edges.push(in_neighbor.edge_id);
345                if edge_data.weight() >= k {
346                    dummy_expensive_in_edges.push(in_neighbor.edge_id);
347                } else {
348                    dummy_cheap_in_edges.push((in_neighbor.edge_id, edge_data.weight()));
349                }
350            }
351        }
352
353        if !dummy_in_edges.is_empty() {
354            dummy_out_edges.clear();
355            dummy_cheap_out_edges.clear();
356            dummy_expensive_out_edges.clear();
357            for out_neighbor in graph.out_neighbors(node_index) {
358                let edge_data = graph.edge_data(out_neighbor.edge_id);
359                if edge_data.is_dummy() {
360                    dummy_out_edges.push(out_neighbor.edge_id);
361                    if edge_data.weight() >= k {
362                        dummy_expensive_out_edges.push(out_neighbor.edge_id);
363                    } else {
364                        dummy_cheap_out_edges.push((out_neighbor.edge_id, edge_data.weight()));
365                    }
366                }
367            }
368
369            if !dummy_out_edges.is_empty() {
370                if dummy_in_edges.len() == 1 && dummy_out_edges.len() == 1 {
371                    if graph
372                        .mirror_edge_edge_centric(*dummy_in_edges.first().unwrap())
373                        .expect("Edge has no mirror")
374                        == *dummy_out_edges.first().unwrap()
375                    {
376                        debug_assert_ne!(
377                            graph.edge_data(*dummy_in_edges.first().unwrap()).weight(),
378                            0
379                        );
380                        continue;
381                    }
382                }
383
384                panic!("Found node with both an incoming and an outgoing dummy edge. This node is {}a self-mirror. Dummy in-edges: cheap: {:?} expensive: {:?}, dummy out-edges: cheap: {:?} expensive: {:?}",
385                           if graph.is_self_mirror_node(node_index) { "" } else { "NOT " },
386                           dummy_cheap_in_edges, dummy_expensive_in_edges, dummy_cheap_out_edges, dummy_expensive_out_edges);
387            }
388        }
389    }
390}
391
392pub fn make_graph_eulerian_with_breaking_edges<
393    NodeIndex: GraphIndex<OptionalNodeIndex>,
394    OptionalNodeIndex: OptionalGraphIndex<NodeIndex>,
395    SequenceHandle: Default + Clone,
396    EdgeData: BidirectedData + Eq + MatchtigEdgeData<SequenceHandle> + Clone,
397    Graph: DynamicEdgeCentricBigraph<
398        NodeIndex = NodeIndex,
399        OptionalNodeIndex = OptionalNodeIndex,
400        EdgeData = EdgeData,
401    >,
402>(
403    graph: &mut Graph,
404    dummy_sequence: SequenceHandle,
405    dummy_edge_id: &mut usize,
406    k: usize,
407) {
408    let nodes_and_differences = find_non_eulerian_binodes_with_differences(graph);
409    let mut out_node_differences = BTreeMap::new();
410    let mut in_node_differences = BTreeMap::new();
411    out_node_differences.extend(
412        nodes_and_differences
413            .iter()
414            .filter(|(_, difference)| *difference < 0)
415            .copied()
416            .map(|(e, d)| (std::cmp::Reverse(e), d)),
417    );
418    in_node_differences.extend(
419        nodes_and_differences
420            .iter()
421            .filter(|(_, difference)| *difference > 0)
422            .copied(),
423    );
424    let self_mirror_node_differences: Vec<_> = nodes_and_differences
425        .iter()
426        .filter_map(|&(node, difference)| if difference == 0 { Some(node) } else { None })
427        .collect();
428    info!(
429        "Adding edges for {} unmatched in_nodes, {} unmatched out_nodes and {} unmatched self_mirror_nodes",
430        in_node_differences.len(),
431        out_node_differences.len(),
432        self_mirror_node_differences.len(),
433    );
434
435    let total_out_node_difference: isize = out_node_differences.values().copied().sum();
436    let total_in_node_difference: isize = in_node_differences.values().copied().sum();
437    let out_node_difference_one_count = out_node_differences.values().filter(|d| **d == -1).count();
438    let in_node_difference_one_count = in_node_differences.values().filter(|d| **d == 1).count();
439    let out_node_difference_two_count = out_node_differences.values().filter(|d| **d == -2).count();
440    let in_node_difference_two_count = in_node_differences.values().filter(|d| **d == 2).count();
441    let out_node_difference_three_count =
442        out_node_differences.values().filter(|d| **d == -3).count();
443    let in_node_difference_three_count = in_node_differences.values().filter(|d| **d == 3).count();
444    let out_node_difference_four_count =
445        out_node_differences.values().filter(|d| **d == -4).count();
446    let in_node_difference_four_count = in_node_differences.values().filter(|d| **d == 4).count();
447    let out_node_difference_more_count = out_node_differences.values().filter(|d| **d < -4).count();
448    let in_node_difference_more_count = in_node_differences.values().filter(|d| **d > 4).count();
449    debug_assert_eq!(-total_out_node_difference, total_in_node_difference);
450    debug_assert_eq!(
451        (total_in_node_difference + self_mirror_node_differences.len() as isize) % 2,
452        0
453    );
454    info!(
455        "{} edges need to be added in total",
456        (total_in_node_difference + self_mirror_node_differences.len() as isize) / 2
457    );
458    debug_assert_eq!(out_node_difference_one_count, in_node_difference_one_count);
459    debug_assert_eq!(out_node_difference_two_count, in_node_difference_two_count);
460    debug_assert_eq!(
461        out_node_difference_three_count,
462        in_node_difference_three_count
463    );
464    debug_assert_eq!(
465        out_node_difference_four_count,
466        in_node_difference_four_count
467    );
468    debug_assert_eq!(
469        out_node_difference_more_count,
470        in_node_difference_more_count
471    );
472    debug_assert_eq!(out_node_difference_more_count, 0);
473    info!(
474        "{}/{}/{}/{} binodes that are not self-mirrors have difference 1/2/3/4",
475        out_node_difference_one_count,
476        out_node_difference_two_count,
477        out_node_difference_three_count,
478        out_node_difference_four_count
479    );
480
481    for pair in &self_mirror_node_differences.into_iter().chunks(2) {
482        let pair: Vec<_> = pair.collect();
483        if pair.len() == 2 {
484            let out_node = pair[0];
485            let in_node = pair[1];
486            let mirror_out_node = graph.mirror_node(in_node).unwrap();
487            let mirror_in_node = graph.mirror_node(out_node).unwrap();
488
489            let sequence = dummy_sequence.clone();
490            *dummy_edge_id += 1;
491            let edge_data = EdgeData::new(sequence, true, k, *dummy_edge_id);
492            graph.add_edge(out_node, in_node, edge_data.clone());
493            graph.add_edge(mirror_out_node, mirror_in_node, edge_data.mirror());
494        } else {
495            debug_assert_eq!(pair.len(), 1);
496            let (&in_node, difference) = in_node_differences.iter_mut().next().expect(
497                "Have an uneven number of self-mirrors, but no other nodes with missing in edges.",
498            );
499            debug_assert_ne!(in_node, graph.mirror_node(in_node).unwrap());
500            debug_assert!(*difference > 0);
501
502            let out_node = pair[0];
503            let mirror_out_node = graph.mirror_node(in_node).unwrap();
504            let mirror_in_node = graph.mirror_node(out_node).unwrap();
505
506            let sequence = dummy_sequence.clone();
507            *dummy_edge_id += 1;
508            let edge_data = EdgeData::new(sequence, true, k, *dummy_edge_id);
509            graph.add_edge(out_node, in_node, edge_data.clone());
510            graph.add_edge(mirror_out_node, mirror_in_node, edge_data.mirror());
511
512            *difference -= 1;
513            if *difference == 0 {
514                in_node_differences.remove(&in_node);
515                out_node_differences
516                    .remove(&std::cmp::Reverse(graph.mirror_node(in_node).unwrap()))
517                    .expect("Mirror of in_node not found");
518            } else {
519                *out_node_differences
520                    .get_mut(&std::cmp::Reverse(graph.mirror_node(in_node).unwrap()))
521                    .expect("Mirror of in_node not found") += 1;
522            }
523        }
524    }
525
526    while let Some(out_node) = out_node_differences.keys().copied().next() {
527        /*let current_out_node_difference: isize = out_node_differences.iter().map(|(_, d)| *d).sum();
528        let current_in_node_difference: isize = in_node_differences.iter().map(|(_, d)| *d).sum();
529        debug_assert_eq!(-current_out_node_difference, current_in_node_difference);*/
530
531        let out_node = out_node.0;
532        let out_node_difference = out_node_differences
533            .get_mut(&std::cmp::Reverse(out_node))
534            .unwrap();
535        let in_node = if let Some(in_node) = choose_in_node_from_iterator(
536            graph,
537            in_node_differences.keys().copied(),
538            *out_node_difference,
539            out_node,
540        ) {
541            in_node
542        } else {
543            let current_out_node_difference: isize = out_node_differences.values().copied().sum();
544            let current_in_node_difference: isize = in_node_differences.values().copied().sum();
545            debug_assert_eq!(-current_out_node_difference, current_in_node_difference);
546            debug_assert_eq!(
547                (
548                    out_node_differences.values().copied().sum::<isize>(),
549                    in_node_differences.values().copied().sum::<isize>()
550                ),
551                (-1, 1)
552            );
553            panic!("No further in_nodes left");
554        };
555
556        /*info!(
557            "in_node {} and out_node {}",
558            in_node.as_usize(),
559            out_node.as_usize()
560        );*/
561        let is_mirror = in_node == graph.mirror_node(out_node).unwrap();
562        debug_assert_ne!(
563            in_node, out_node,
564            "This part of the algorithm was not designed for self mirrors"
565        );
566        debug_assert!(compute_eulerian_superfluous_out_biedges(graph, out_node) < 0);
567        debug_assert!(compute_eulerian_superfluous_out_biedges(graph, in_node) > 0);
568
569        let mirror_out_node = graph.mirror_node(in_node).unwrap();
570        let mirror_in_node = graph.mirror_node(out_node).unwrap();
571
572        let sequence = dummy_sequence.clone();
573        *dummy_edge_id += 1;
574        let edge_data = EdgeData::new(sequence, true, k, *dummy_edge_id);
575
576        graph.add_edge(out_node, in_node, edge_data.clone());
577        graph.add_edge(mirror_out_node, mirror_in_node, edge_data.mirror());
578
579        let in_node_difference = in_node_differences.get_mut(&in_node).unwrap();
580        debug_assert_ne!(*out_node_difference, 0);
581        debug_assert_ne!(*in_node_difference, 0);
582        *out_node_difference += 1;
583        *in_node_difference -= 1;
584
585        if is_mirror {
586            debug_assert_ne!(*out_node_difference, 0);
587            debug_assert_ne!(*in_node_difference, 0);
588        }
589
590        let remove_out_node = *out_node_difference == 0;
591        let remove_in_node = *in_node_difference == 0;
592        if remove_out_node {
593            debug_assert_eq!(compute_eulerian_superfluous_out_biedges(graph, out_node), 0);
594            out_node_differences.remove(&std::cmp::Reverse(out_node));
595        } else {
596            debug_assert!(
597                compute_eulerian_superfluous_out_biedges(graph, out_node) != 0 || is_mirror
598            );
599        }
600        if remove_in_node {
601            debug_assert_eq!(compute_eulerian_superfluous_out_biedges(graph, in_node), 0);
602            in_node_differences.remove(&in_node);
603        } else {
604            debug_assert!(
605                compute_eulerian_superfluous_out_biedges(graph, in_node) != 0 || is_mirror
606            );
607        }
608
609        if let Some(mirror_out_node_difference) =
610            out_node_differences.get_mut(&std::cmp::Reverse(mirror_out_node))
611        {
612            *mirror_out_node_difference += 1;
613            if *mirror_out_node_difference == 0 {
614                debug_assert_eq!(
615                    compute_eulerian_superfluous_out_biedges(graph, mirror_out_node),
616                    0
617                );
618                debug_assert!(remove_in_node || is_mirror);
619                out_node_differences.remove(&std::cmp::Reverse(mirror_out_node));
620            } else {
621                debug_assert_ne!(
622                    compute_eulerian_superfluous_out_biedges(graph, mirror_out_node),
623                    0
624                );
625                debug_assert!(!remove_in_node);
626            }
627        }
628        if let Some(mirror_in_node_difference) = in_node_differences.get_mut(&mirror_in_node) {
629            *mirror_in_node_difference -= 1;
630            if *mirror_in_node_difference == 0 {
631                debug_assert_eq!(
632                    compute_eulerian_superfluous_out_biedges(graph, mirror_in_node),
633                    0
634                );
635                debug_assert!(remove_out_node || is_mirror);
636                in_node_differences.remove(&mirror_in_node);
637            } else {
638                debug_assert_ne!(
639                    compute_eulerian_superfluous_out_biedges(graph, mirror_in_node),
640                    0
641                );
642                debug_assert!(!remove_out_node);
643            }
644        }
645    }
646
647    debug_assert!(out_node_differences.is_empty());
648    debug_assert!(in_node_differences.is_empty());
649}
650
651/// Convenience method for [write_duplication_bitvector].
652pub fn write_duplication_bitvector_to_file<
653    'ws,
654    SequenceHandle,
655    EdgeData: MatchtigEdgeData<SequenceHandle>,
656    Graph: ImmutableGraphContainer<EdgeData = EdgeData>,
657    Walk: 'ws + EdgeWalk<Graph, Subwalk>,
658    Subwalk: EdgeWalk<Graph, Subwalk> + ?Sized,
659    WalkSource: 'ws + IntoIterator<Item = &'ws Walk>,
660>(
661    graph: &Graph,
662    walks: WalkSource,
663    path: impl AsRef<Path>,
664) -> Result<(), std::io::Error> {
665    write_duplication_bitvector(graph, walks, &mut BufWriter::new(File::create(path)?))
666}
667
668/// Write a bitvector in ASCII format for each walk.
669/// The bitvector contains a 1 for each original kmer, and a 0 for each duplicate kmer.
670/// The bitvectors of different walks are separated by newlines.
671pub fn write_duplication_bitvector<
672    'ws,
673    SequenceHandle,
674    EdgeData: MatchtigEdgeData<SequenceHandle>,
675    Graph: ImmutableGraphContainer<EdgeData = EdgeData>,
676    Walk: 'ws + EdgeWalk<Graph, Subwalk>,
677    Subwalk: EdgeWalk<Graph, Subwalk> + ?Sized,
678    WalkSource: 'ws + IntoIterator<Item = &'ws Walk>,
679>(
680    graph: &Graph,
681    walks: WalkSource,
682    writer: &mut impl std::io::Write,
683) -> Result<(), std::io::Error> {
684    for walk in walks.into_iter() {
685        if walk.is_empty() {
686            panic!("Found empty walk when writing duplication bitvector");
687        }
688
689        for edge in walk.iter() {
690            let edge_data = graph.edge_data(*edge);
691            let character = if edge_data.is_original() { '1' } else { '0' };
692
693            for _ in 0..edge_data.weight() {
694                write!(writer, "{character}")?;
695            }
696        }
697
698        writeln!(writer)?;
699    }
700
701    Ok(())
702}
703
704fn append_to_filename(path: PathBuf, ext: impl AsRef<OsStr>) -> PathBuf {
705    let mut os_string: OsString = path.into();
706    os_string.push(ext.as_ref());
707    os_string.into()
708}
709
710#[cfg(test)]
711mod tests {
712    use crate::implementation::{make_graph_eulerian_with_breaking_edges, MatchtigEdgeData};
713    use genome_graph::bigraph::implementation::node_bigraph_wrapper::NodeBigraphWrapper;
714    use genome_graph::bigraph::interface::dynamic_bigraph::DynamicBigraph;
715    use genome_graph::bigraph::interface::static_bigraph::StaticBigraphFromDigraph;
716    use genome_graph::bigraph::interface::BidirectedData;
717    use genome_graph::bigraph::traitgraph::implementation::petgraph_impl::PetGraph;
718    use genome_graph::bigraph::traitgraph::interface::MutableGraphContainer;
719    use traitgraph_algo::dijkstra::DijkstraWeightedEdgeData;
720
721    #[derive(Debug, Clone, Eq, PartialEq)]
722    struct TestEdgeData {
723        sequence_handle: usize,
724        forwards: bool,
725        weight: usize,
726        dummy_id: usize,
727    }
728
729    impl DijkstraWeightedEdgeData<usize> for TestEdgeData {
730        fn weight(&self) -> usize {
731            self.weight
732        }
733    }
734
735    impl BidirectedData for TestEdgeData {
736        fn mirror(&self) -> Self {
737            let mut result = self.clone();
738            result.forwards = !result.forwards;
739            result
740        }
741    }
742
743    impl MatchtigEdgeData<usize> for TestEdgeData {
744        fn is_dummy(&self) -> bool {
745            self.dummy_id > 0
746        }
747
748        fn is_forwards(&self) -> bool {
749            self.forwards
750        }
751
752        fn new(sequence_handle: usize, forwards: bool, weight: usize, dummy_id: usize) -> Self {
753            Self {
754                sequence_handle,
755                forwards,
756                weight,
757                dummy_id,
758            }
759        }
760    }
761
762    #[test]
763    fn test_make_graph_eulerian_with_breaking_edges_mirror_nodes() {
764        let mut graph = NodeBigraphWrapper::new(PetGraph::new());
765
766        let nodes: Vec<_> = (0..8).map(|_| graph.add_node(())).collect();
767        graph.set_mirror_nodes(nodes[0], nodes[1]);
768        graph.set_mirror_nodes(nodes[2], nodes[2]);
769        graph.set_mirror_nodes(nodes[3], nodes[3]);
770        graph.set_mirror_nodes(nodes[4], nodes[5]);
771        graph.set_mirror_nodes(nodes[6], nodes[6]);
772        graph.set_mirror_nodes(nodes[7], nodes[7]);
773
774        graph.add_edge(nodes[0], nodes[3], TestEdgeData::new(1, true, 0, 1));
775        graph.add_edge(nodes[3], nodes[1], TestEdgeData::new(1, false, 0, 1));
776        graph.add_edge(nodes[2], nodes[0], TestEdgeData::new(2, true, 0, 2));
777        graph.add_edge(nodes[1], nodes[2], TestEdgeData::new(2, false, 0, 2));
778        graph.add_edge(nodes[6], nodes[4], TestEdgeData::new(3, true, 0, 3));
779        graph.add_edge(nodes[5], nodes[6], TestEdgeData::new(3, false, 0, 3));
780        graph.add_edge(nodes[7], nodes[4], TestEdgeData::new(4, true, 0, 4));
781        graph.add_edge(nodes[5], nodes[7], TestEdgeData::new(4, false, 0, 4));
782
783        make_graph_eulerian_with_breaking_edges(&mut graph, 0, &mut 5, 4);
784        println!("{graph:?}");
785    }
786}