libmatchtigs/implementation/eulertigs/
mod.rs1use crate::implementation::{
2 debug_assert_graph_has_no_consecutive_dummy_edges, make_graph_eulerian_with_breaking_edges,
3 MatchtigEdgeData, RelaxedAtomicBoolVec,
4};
5use crate::TigAlgorithm;
6use genome_graph::bigraph::algo::eulerian::{
7 compute_eulerian_superfluous_out_biedges,
8 compute_minimum_bidirected_eulerian_cycle_decomposition, decomposes_into_eulerian_bicycles,
9 find_non_eulerian_binodes_with_differences,
10};
11use genome_graph::bigraph::interface::dynamic_bigraph::DynamicEdgeCentricBigraph;
12use genome_graph::bigraph::interface::BidirectedData;
13use genome_graph::bigraph::traitgraph::index::{GraphIndex, OptionalGraphIndex};
14use genome_graph::bigraph::traitgraph::interface::GraphBase;
15use genome_graph::bigraph::traitgraph::walks::{EdgeWalk, VecEdgeWalk};
16use log::{error, info, warn};
17use std::marker::PhantomData;
18
19#[derive(Default)]
21pub struct EulertigAlgorithm<SequenceHandle> {
22 _phantom_data: PhantomData<SequenceHandle>,
23}
24
25impl<Graph: GraphBase, SequenceHandle: Default + Clone> TigAlgorithm<Graph>
26 for EulertigAlgorithm<SequenceHandle>
27where
28 Graph: DynamicEdgeCentricBigraph,
29 Graph::EdgeData: BidirectedData + Eq + Clone + MatchtigEdgeData<SequenceHandle>,
30{
31 type Configuration = EulertigAlgorithmConfiguration;
32
33 fn compute_tigs(
34 graph: &mut Graph,
35 configuration: &Self::Configuration,
36 ) -> Vec<VecEdgeWalk<Graph>> {
37 compute_eulertigs(graph, configuration)
38 }
39}
40
41pub struct EulertigAlgorithmConfiguration {
43 pub k: usize,
45}
46
47fn compute_eulertigs<
49 NodeIndex: GraphIndex<OptionalNodeIndex>,
50 OptionalNodeIndex: OptionalGraphIndex<NodeIndex>,
51 SequenceHandle: Default + Clone,
52 EdgeData: BidirectedData + Eq + MatchtigEdgeData<SequenceHandle> + Clone,
53 Graph: DynamicEdgeCentricBigraph<
54 NodeIndex = NodeIndex,
55 OptionalNodeIndex = OptionalNodeIndex,
56 EdgeData = EdgeData,
57 >,
58>(
59 graph: &mut Graph,
60 configuration: &EulertigAlgorithmConfiguration,
61) -> Vec<VecEdgeWalk<Graph>> {
62 let k = configuration.k;
63
64 info!("Collecting nodes with missing incoming or outgoing edges");
65 let mut out_nodes = Vec::new(); let mut in_node_count = 0;
67 let in_node_map = RelaxedAtomicBoolVec::new(graph.node_count());
68 let mut node_multiplicities = vec![0; graph.node_count()];
69 let mut unbalanced_self_mirror_count = 0;
70
71 for node_index in graph.node_indices() {
72 let diff = compute_eulerian_superfluous_out_biedges(graph, node_index);
73 if graph.is_self_mirror_node(node_index) && diff != 0 {
74 in_node_count += 1;
75 in_node_map.set(node_index.as_usize(), true);
76 node_multiplicities[node_index.as_usize()] = diff;
77 out_nodes.push(node_index);
78 unbalanced_self_mirror_count += 1;
79 } else if diff > 0 {
80 in_node_count += 1;
81 in_node_map.set(node_index.as_usize(), true);
82 node_multiplicities[node_index.as_usize()] = diff;
83 } else if diff < 0 {
84 out_nodes.push(node_index);
85 node_multiplicities[node_index.as_usize()] = diff;
86 }
87 }
88
89 info!(
90 "Found {} nodes with missing outgoing edges",
91 out_nodes.len()
92 );
93 info!("Found {} nodes with missing incoming edges", in_node_count);
94 info!(
95 "Of those there are {} self-mirrors",
96 unbalanced_self_mirror_count
97 );
98
99 info!("Making graph Eulerian by adding breaking dummy edges");
100 let dummy_sequence = SequenceHandle::default();
101 let mut dummy_edge_id = 0;
102 make_graph_eulerian_with_breaking_edges(graph, dummy_sequence, &mut dummy_edge_id, k);
103
104 if !decomposes_into_eulerian_bicycles(graph) {
106 let non_eulerian_nodes_and_differences = find_non_eulerian_binodes_with_differences(graph);
107 error!(
108 "Failed to make the graph Eulerian. Non-Eulerian nodes and differences:\n{:?}",
109 non_eulerian_nodes_and_differences
110 );
111 panic!("Failed to make the graph Eulerian.");
112 }
113 debug_assert!(graph.verify_node_pairing());
114 debug_assert!(graph.verify_edge_mirror_property());
115 debug_assert_graph_has_no_consecutive_dummy_edges(graph, k);
116
117 info!("Finding Eulerian bicycle");
118 let mut eulerian_cycles = compute_minimum_bidirected_eulerian_cycle_decomposition(graph);
120 info!("Found {} Eulerian bicycles", eulerian_cycles.len());
121
122 info!("Breaking Eulerian bicycles at expensive temporary edges");
123 let mut eulertigs = Vec::new();
124
125 let mut removed_edges = 0;
126 for eulerian_cycle in &mut eulerian_cycles {
127 info!(
128 "Processing Eulerian bicycle with {} biedges",
129 eulerian_cycle.len()
130 );
131 debug_assert!(eulerian_cycle.is_circular_walk(graph));
132
133 let mut longest_dummy_weight = 0;
135 let mut longest_dummy_index = 0;
136 for (index, &edge) in eulerian_cycle.iter().enumerate() {
137 let edge_data = graph.edge_data(edge);
138 if edge_data.is_dummy() && edge_data.weight() > longest_dummy_weight {
139 longest_dummy_weight = edge_data.weight();
140 longest_dummy_index = index;
141 }
142 }
143 if longest_dummy_weight > 0 {
144 eulerian_cycle.rotate_left(longest_dummy_index);
145 }
146
147 let mut offset = 0;
148 let mut last_edge_is_dummy = false;
149 for (current_cycle_index, ¤t_edge_index) in eulerian_cycle.iter().enumerate() {
150 let edge_data = graph.edge_data(current_edge_index);
151
152 if edge_data.is_original() {
153 last_edge_is_dummy = false;
154 } else {
155 if last_edge_is_dummy {
156 warn!(
157 "Found consecutive dummy edges at {}",
158 current_edge_index.as_usize()
159 );
160 }
161 last_edge_is_dummy = true;
162 }
163
164 if (edge_data.weight() >= k && edge_data.is_dummy())
165 || (edge_data.is_dummy() && current_cycle_index == 0)
166 {
167 if offset < current_cycle_index {
168 eulertigs.push(eulerian_cycle[offset..current_cycle_index].to_owned());
169 } else if current_cycle_index > 0 {
170 warn!("Found consecutive breaking edges");
171 }
172 offset = current_cycle_index + 1;
173 removed_edges += 1;
174 }
175 }
176 if offset < eulerian_cycle.len() {
177 if graph
178 .edge_data(*eulerian_cycle.last().unwrap())
179 .is_original()
180 {
181 eulertigs.push(eulerian_cycle[offset..eulerian_cycle.len()].to_owned());
182 } else if offset < eulerian_cycle.len() - 1 {
183 eulertigs.push(eulerian_cycle[offset..eulerian_cycle.len() - 1].to_owned());
184 }
185 }
186 }
187
188 info!("Found {} expensive temporary edges", removed_edges);
189 info!("Found {} eulertigs", eulertigs.len());
190
191 for eulertig in &eulertigs {
192 debug_assert!(!eulertig.is_empty());
193 debug_assert!(graph.edge_data(*eulertig.first().unwrap()).is_original());
194 debug_assert!(graph.edge_data(*eulertig.last().unwrap()).is_original());
195 }
196
197 eulertigs
198}