genome_graph/generic/
mod.rs

1use bigraph::interface::dynamic_bigraph::DynamicEdgeCentricBigraph;
2use bigraph::interface::BidirectedData;
3use bigraph::traitgraph::index::GraphIndex;
4use bigraph::traitgraph::interface::GraphBase;
5use std::fmt::Formatter;
6
7pub(crate) enum MappedNode<Graph: GraphBase> {
8    Unmapped,
9    Normal {
10        forward: Graph::NodeIndex,
11        backward: Graph::NodeIndex,
12    },
13    SelfMirror(Graph::NodeIndex),
14}
15
16impl<Graph: GraphBase> MappedNode<Graph> {
17    pub(crate) fn mirror(self) -> Self {
18        match self {
19            MappedNode::Unmapped => MappedNode::Unmapped,
20            MappedNode::Normal { forward, backward } => MappedNode::Normal {
21                forward: backward,
22                backward: forward,
23            },
24            MappedNode::SelfMirror(node) => MappedNode::SelfMirror(node),
25        }
26    }
27}
28
29impl<Graph: GraphBase> std::fmt::Debug for MappedNode<Graph> {
30    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
31        match self {
32            MappedNode::Unmapped => write!(f, "unmapped"),
33            MappedNode::Normal { forward, backward } => {
34                write!(f, "({}, {})", forward.as_usize(), backward.as_usize())
35            }
36            MappedNode::SelfMirror(node) => write!(f, "{}", node.as_usize()),
37        }
38    }
39}
40
41impl<Graph: GraphBase> Clone for MappedNode<Graph> {
42    fn clone(&self) -> Self {
43        *self
44    }
45}
46
47impl<Graph: GraphBase> Copy for MappedNode<Graph> {}
48
49impl<Graph: GraphBase> PartialEq for MappedNode<Graph> {
50    fn eq(&self, other: &Self) -> bool {
51        match (self, other) {
52            (MappedNode::Unmapped, MappedNode::Unmapped) => true,
53            (
54                MappedNode::Normal {
55                    forward: f1,
56                    backward: b1,
57                },
58                MappedNode::Normal {
59                    forward: f2,
60                    backward: b2,
61                },
62            ) => f1 == f2 && b1 == b2,
63            (MappedNode::SelfMirror(n1), MappedNode::SelfMirror(n2)) => n1 == n2,
64            _ => false,
65        }
66    }
67}
68
69impl<Graph: GraphBase> Eq for MappedNode<Graph> {}
70
71/// A node representing a unitig with the edges of a bidirected de Bruijn graph, inspired by bcalm2's fasta format.
72pub trait GenericNode {
73    /// A unique identifier of this node.
74    /// The identifiers of the nodes need to be numbered consecutively starting from 0.
75    fn id(&self) -> usize;
76
77    /// Return true if this node is self-complemental, i.e. if the reverse complement of the first k-1 characters equals the last k-1 characters.
78    fn is_self_complemental(&self) -> bool;
79
80    /// Return an iterator over the edges of the node.
81    /// It is enough to return the edges that also bcalm2 would return.
82    fn edges(&self) -> impl Iterator<Item = GenericEdge>;
83}
84
85/// An edge representing a k-1 overlap between unitigs.
86///
87/// Terminology: the edge goes from "tail" to "head".
88#[derive(Debug, Clone, Copy, Eq, PartialEq)]
89pub struct GenericEdge {
90    /// The direction of the unitig at the tail of the edge.
91    pub from_side: bool,
92    /// The id of the unitig at the head of the edge.
93    pub to_node: usize,
94    /// The direction of the unitig at the head of the edge.
95    pub to_side: bool,
96}
97
98/// Read a genome graph in bcalm2 fasta format into an edge-centric representation.
99pub fn convert_generic_node_centric_bigraph_to_edge_centric<
100    GenomeSequenceStoreHandle,
101    NodeData: Default + Clone,
102    InputEdgeData: GenericNode,
103    OutputEdgeData: From<InputEdgeData> + Clone + Eq + BidirectedData,
104    Graph: DynamicEdgeCentricBigraph<NodeData = NodeData, EdgeData = OutputEdgeData> + Default,
105>(
106    reader: impl IntoIterator<Item = InputEdgeData>,
107) -> crate::error::Result<Graph>
108where
109    <Graph as GraphBase>::NodeIndex: Clone,
110{
111    let mut node_map: Vec<MappedNode<Graph>> = Vec::new();
112    let mut graph = Graph::default();
113
114    for generic_node in reader.into_iter() {
115        let edge_is_self_mirror = generic_node.is_self_complemental();
116
117        let n1 = generic_node.id() * 2;
118        let n2 = generic_node.id() * 2 + 1;
119
120        let n1_is_self_mirror = generic_node.edges().any(|edge| {
121            edge == GenericEdge {
122                from_side: false,
123                to_node: generic_node.id(),
124                to_side: true,
125            }
126        });
127        let n2_is_self_mirror = generic_node.edges().any(|edge| {
128            edge == GenericEdge {
129                from_side: true,
130                to_node: generic_node.id(),
131                to_side: false,
132            }
133        });
134
135        if node_map.len() <= n2 {
136            node_map.resize(n2 + 1, MappedNode::Unmapped);
137        }
138
139        // If the record has no known incoming binode yet
140        if node_map[n1] == MappedNode::Unmapped {
141            let mut assign_to_neighbors = false;
142
143            // If the record has no known incoming binode yet, first search if one of the neighbors exist
144            for edge in generic_node
145                .edges()
146                // Incoming edges to n1 are outgoing on its reverse complement
147                .filter(|edge| !edge.from_side)
148            {
149                // Location of the to_node of the edge in the node_map
150                let to_node = edge.to_node * 2 + if edge.to_side { 0 } else { 1 };
151
152                if node_map.len() <= to_node {
153                    node_map.resize(to_node + 1, MappedNode::Unmapped);
154                }
155                if node_map[to_node] != MappedNode::Unmapped {
156                    node_map[n1] = if !edge.to_side {
157                        node_map[to_node]
158                    } else {
159                        // If the edge changes sides, the node is mirrored
160                        node_map[to_node].mirror()
161                    };
162                    assign_to_neighbors = true;
163                    break;
164                }
165            }
166
167            // If no neighbor was found, create a new binode and also assign it to the neighbors
168            if node_map[n1] == MappedNode::Unmapped {
169                if n1_is_self_mirror {
170                    let n1s = graph.add_node(NodeData::default());
171                    graph.set_mirror_nodes(n1s, n1s);
172                    node_map[n1] = MappedNode::SelfMirror(n1s);
173                } else {
174                    let n1f = graph.add_node(NodeData::default());
175                    let n1r = graph.add_node(NodeData::default());
176                    graph.set_mirror_nodes(n1f, n1r);
177                    node_map[n1] = MappedNode::Normal {
178                        forward: n1f,
179                        backward: n1r,
180                    };
181                }
182                assign_to_neighbors = true;
183            }
184
185            if assign_to_neighbors {
186                // Assign the new node also to the neighbors
187                for edge in generic_node
188                    .edges()
189                    // Incoming edges to n1 are outgoing on its reverse complement
190                    .filter(|edge| !edge.from_side)
191                {
192                    // Location of the to_node of the edge in the node_map
193                    let to_node = edge.to_node * 2 + if edge.to_side { 0 } else { 1 };
194                    node_map[to_node] = if !edge.to_side {
195                        node_map[n1]
196                    } else {
197                        // If the edge changes sides, the node is mirrored
198                        node_map[n1].mirror()
199                    };
200                }
201            }
202        }
203
204        // If the record has no known outgoing binode yet
205        if node_map[n2] == MappedNode::Unmapped {
206            let mut assign_to_neighbors = false;
207
208            if edge_is_self_mirror {
209                node_map[n2] = node_map[n1].mirror();
210                // not sure if needed, but should be rare enough that it is not worth to think about it (and it is correct like this as well)
211                assign_to_neighbors = true;
212            } else {
213                // If the record has no known outgoing binode yet, first search if one of the neighbors exist
214                for edge in generic_node
215                    .edges()
216                    // Outgoing edges from n1 are outgoing from its forward variant
217                    .filter(|edge| edge.from_side)
218                {
219                    // Location of the to_node of the edge in the node_map
220                    let to_node = edge.to_node * 2 + if edge.to_side { 0 } else { 1 };
221
222                    if node_map.len() <= to_node {
223                        node_map.resize(to_node + 1, MappedNode::Unmapped);
224                    }
225                    if node_map[to_node] != MappedNode::Unmapped {
226                        node_map[n2] = if edge.to_side {
227                            node_map[to_node]
228                        } else {
229                            // If the edge changes sides, the node is mirrored
230                            node_map[to_node].mirror()
231                        };
232                        assign_to_neighbors = true;
233                        break;
234                    }
235                }
236
237                // If no neighbor was found, create a new binode and also assign it to the neighbors
238                if node_map[n2] == MappedNode::Unmapped {
239                    if n2_is_self_mirror {
240                        let n2s = graph.add_node(NodeData::default());
241                        graph.set_mirror_nodes(n2s, n2s);
242                        node_map[n2] = MappedNode::SelfMirror(n2s);
243                    } else {
244                        let n2f = graph.add_node(NodeData::default());
245                        let n2r = graph.add_node(NodeData::default());
246                        graph.set_mirror_nodes(n2f, n2r);
247                        node_map[n2] = MappedNode::Normal {
248                            forward: n2f,
249                            backward: n2r,
250                        };
251                    }
252                    assign_to_neighbors = true;
253                }
254            }
255
256            if assign_to_neighbors {
257                // Assign the new node also to the neighbors
258                for edge in generic_node
259                    .edges()
260                    // Outgoing edges from n1 are outgoing from its forward variant
261                    .filter(|edge| edge.from_side)
262                {
263                    // Location of the to_node of the edge in the node_map
264                    let to_node = edge.to_node * 2 + if edge.to_side { 0 } else { 1 };
265                    node_map[to_node] = if edge.to_side {
266                        node_map[n2]
267                    } else {
268                        // If the edge changes sides, the node is mirrored
269                        node_map[n2].mirror()
270                    };
271                }
272            }
273        }
274
275        debug_assert_ne!(node_map[n1], MappedNode::Unmapped);
276        debug_assert_ne!(node_map[n2], MappedNode::Unmapped);
277
278        let (n1f, n1r) = match node_map[n1] {
279            MappedNode::Unmapped => unreachable!(),
280            MappedNode::Normal { forward, backward } => (forward, backward),
281            MappedNode::SelfMirror(node) => (node, node),
282        };
283        let (n2f, n2r) = match node_map[n2] {
284            MappedNode::Unmapped => unreachable!(),
285            MappedNode::Normal { forward, backward } => (forward, backward),
286            MappedNode::SelfMirror(node) => (node, node),
287        };
288
289        let edge_data: OutputEdgeData = generic_node.into();
290        graph.add_edge(n1f, n2f, edge_data.clone());
291        graph.add_edge(n2r, n1r, edge_data.mirror());
292    }
293
294    Ok(graph)
295}