libmatchtigs/implementation/eulertigs/
mod.rs

1use crate::implementation::{
2    debug_assert_graph_has_no_consecutive_dummy_edges, make_graph_eulerian_with_breaking_edges,
3    MatchtigEdgeData, RelaxedAtomicBoolVec,
4};
5use crate::TigAlgorithm;
6use genome_graph::bigraph::algo::eulerian::{
7    compute_eulerian_superfluous_out_biedges,
8    compute_minimum_bidirected_eulerian_cycle_decomposition, decomposes_into_eulerian_bicycles,
9    find_non_eulerian_binodes_with_differences,
10};
11use genome_graph::bigraph::interface::dynamic_bigraph::DynamicEdgeCentricBigraph;
12use genome_graph::bigraph::interface::BidirectedData;
13use genome_graph::bigraph::traitgraph::index::{GraphIndex, OptionalGraphIndex};
14use genome_graph::bigraph::traitgraph::interface::GraphBase;
15use genome_graph::bigraph::traitgraph::walks::{EdgeWalk, VecEdgeWalk};
16use log::{error, info, warn};
17use std::marker::PhantomData;
18
19/// The eulertigs algorithm.
20#[derive(Default)]
21pub struct EulertigAlgorithm<SequenceHandle> {
22    _phantom_data: PhantomData<SequenceHandle>,
23}
24
25impl<Graph: GraphBase, SequenceHandle: Default + Clone> TigAlgorithm<Graph>
26    for EulertigAlgorithm<SequenceHandle>
27where
28    Graph: DynamicEdgeCentricBigraph,
29    Graph::EdgeData: BidirectedData + Eq + Clone + MatchtigEdgeData<SequenceHandle>,
30{
31    type Configuration = EulertigAlgorithmConfiguration;
32
33    fn compute_tigs(
34        graph: &mut Graph,
35        configuration: &Self::Configuration,
36    ) -> Vec<VecEdgeWalk<Graph>> {
37        compute_eulertigs(graph, configuration)
38    }
39}
40
41/// The options for the eulertigs algorithm.
42pub struct EulertigAlgorithmConfiguration {
43    /// The k used to build the de Bruijn graph.
44    pub k: usize,
45}
46
47/// Computes eulertigs in the given graph.
48fn compute_eulertigs<
49    NodeIndex: GraphIndex<OptionalNodeIndex>,
50    OptionalNodeIndex: OptionalGraphIndex<NodeIndex>,
51    SequenceHandle: Default + Clone,
52    EdgeData: BidirectedData + Eq + MatchtigEdgeData<SequenceHandle> + Clone,
53    Graph: DynamicEdgeCentricBigraph<
54        NodeIndex = NodeIndex,
55        OptionalNodeIndex = OptionalNodeIndex,
56        EdgeData = EdgeData,
57    >,
58>(
59    graph: &mut Graph,
60    configuration: &EulertigAlgorithmConfiguration,
61) -> Vec<VecEdgeWalk<Graph>> {
62    let k = configuration.k;
63
64    info!("Collecting nodes with missing incoming or outgoing edges");
65    let mut out_nodes = Vec::new(); // Misses outgoing edges
66    let mut in_node_count = 0;
67    let in_node_map = RelaxedAtomicBoolVec::new(graph.node_count());
68    let mut node_multiplicities = vec![0; graph.node_count()];
69    let mut unbalanced_self_mirror_count = 0;
70
71    for node_index in graph.node_indices() {
72        let diff = compute_eulerian_superfluous_out_biedges(graph, node_index);
73        if graph.is_self_mirror_node(node_index) && diff != 0 {
74            in_node_count += 1;
75            in_node_map.set(node_index.as_usize(), true);
76            node_multiplicities[node_index.as_usize()] = diff;
77            out_nodes.push(node_index);
78            unbalanced_self_mirror_count += 1;
79        } else if diff > 0 {
80            in_node_count += 1;
81            in_node_map.set(node_index.as_usize(), true);
82            node_multiplicities[node_index.as_usize()] = diff;
83        } else if diff < 0 {
84            out_nodes.push(node_index);
85            node_multiplicities[node_index.as_usize()] = diff;
86        }
87    }
88
89    info!(
90        "Found {} nodes with missing outgoing edges",
91        out_nodes.len()
92    );
93    info!("Found {} nodes with missing incoming edges", in_node_count);
94    info!(
95        "Of those there are {} self-mirrors",
96        unbalanced_self_mirror_count
97    );
98
99    info!("Making graph Eulerian by adding breaking dummy edges");
100    let dummy_sequence = SequenceHandle::default();
101    let mut dummy_edge_id = 0;
102    make_graph_eulerian_with_breaking_edges(graph, dummy_sequence, &mut dummy_edge_id, k);
103
104    // Check if the graph now really is Eulerian, and if not, output some debug information
105    if !decomposes_into_eulerian_bicycles(graph) {
106        let non_eulerian_nodes_and_differences = find_non_eulerian_binodes_with_differences(graph);
107        error!(
108            "Failed to make the graph Eulerian. Non-Eulerian nodes and differences:\n{:?}",
109            non_eulerian_nodes_and_differences
110        );
111        panic!("Failed to make the graph Eulerian.");
112    }
113    debug_assert!(graph.verify_node_pairing());
114    debug_assert!(graph.verify_edge_mirror_property());
115    debug_assert_graph_has_no_consecutive_dummy_edges(graph, k);
116
117    info!("Finding Eulerian bicycle");
118    //debug!("{:?}", graph);
119    let mut eulerian_cycles = compute_minimum_bidirected_eulerian_cycle_decomposition(graph);
120    info!("Found {} Eulerian bicycles", eulerian_cycles.len());
121
122    info!("Breaking Eulerian bicycles at expensive temporary edges");
123    let mut eulertigs = Vec::new();
124
125    let mut removed_edges = 0;
126    for eulerian_cycle in &mut eulerian_cycles {
127        info!(
128            "Processing Eulerian bicycle with {} biedges",
129            eulerian_cycle.len()
130        );
131        debug_assert!(eulerian_cycle.is_circular_walk(graph));
132
133        // Rotate cycle such that longest dummy is first edge
134        let mut longest_dummy_weight = 0;
135        let mut longest_dummy_index = 0;
136        for (index, &edge) in eulerian_cycle.iter().enumerate() {
137            let edge_data = graph.edge_data(edge);
138            if edge_data.is_dummy() && edge_data.weight() > longest_dummy_weight {
139                longest_dummy_weight = edge_data.weight();
140                longest_dummy_index = index;
141            }
142        }
143        if longest_dummy_weight > 0 {
144            eulerian_cycle.rotate_left(longest_dummy_index);
145        }
146
147        let mut offset = 0;
148        let mut last_edge_is_dummy = false;
149        for (current_cycle_index, &current_edge_index) in eulerian_cycle.iter().enumerate() {
150            let edge_data = graph.edge_data(current_edge_index);
151
152            if edge_data.is_original() {
153                last_edge_is_dummy = false;
154            } else {
155                if last_edge_is_dummy {
156                    warn!(
157                        "Found consecutive dummy edges at {}",
158                        current_edge_index.as_usize()
159                    );
160                }
161                last_edge_is_dummy = true;
162            }
163
164            if (edge_data.weight() >= k && edge_data.is_dummy())
165                || (edge_data.is_dummy() && current_cycle_index == 0)
166            {
167                if offset < current_cycle_index {
168                    eulertigs.push(eulerian_cycle[offset..current_cycle_index].to_owned());
169                } else if current_cycle_index > 0 {
170                    warn!("Found consecutive breaking edges");
171                }
172                offset = current_cycle_index + 1;
173                removed_edges += 1;
174            }
175        }
176        if offset < eulerian_cycle.len() {
177            if graph
178                .edge_data(*eulerian_cycle.last().unwrap())
179                .is_original()
180            {
181                eulertigs.push(eulerian_cycle[offset..eulerian_cycle.len()].to_owned());
182            } else if offset < eulerian_cycle.len() - 1 {
183                eulertigs.push(eulerian_cycle[offset..eulerian_cycle.len() - 1].to_owned());
184            }
185        }
186    }
187
188    info!("Found {} expensive temporary edges", removed_edges);
189    info!("Found {} eulertigs", eulertigs.len());
190
191    for eulertig in &eulertigs {
192        debug_assert!(!eulertig.is_empty());
193        debug_assert!(graph.edge_data(*eulertig.first().unwrap()).is_original());
194        debug_assert!(graph.edge_data(*eulertig.last().unwrap()).is_original());
195    }
196
197    eulertigs
198}