three_edge_connected/
graph.rs

1use std::io::prelude::*;
2
3use fxhash::FxHashMap;
4
5use bstr::io::*;
6
7use gfa::parser::{GFAParser, GFAParserBuilder};
8
9pub type AdjacencyList = Vec<usize>;
10pub type FxMapGraph = FxHashMap<usize, AdjacencyList>;
11
12/// An adjacency list representation of a generic graph, including the
13/// map required to go from node index to the original node name. The
14/// `N` type parameter is the node name in the original graph, e.g.
15/// `BString` for GFA graphs, or `usize` for graphs that use integer
16/// names.
17pub struct Graph<N> {
18    pub graph: FxMapGraph,
19    pub inv_names: Vec<N>,
20}
21
22impl Graph<usize> {
23    /// Construct an adjacency graph from an iterator over the edges
24    /// of an existing graph. Both the input and output have `usize`
25    /// node IDs, but `from_edges` performs a transformation to ensure
26    /// all the node IDs are consecutive starting from 0.
27    pub fn from_edges<I>(input: I) -> Graph<usize>
28    where
29        I: Iterator<Item = (usize, usize)>,
30    {
31        let mut graph: FxHashMap<usize, AdjacencyList> = FxHashMap::default();
32        let mut name_map: FxHashMap<usize, usize> = FxHashMap::default();
33        let mut inv_names = Vec::new();
34
35        let mut get_ix = |name: usize| {
36            if let Some(ix) = name_map.get(&name) {
37                *ix
38            } else {
39                let ix = name_map.len();
40                name_map.insert(name, ix);
41                inv_names.push(name);
42                ix
43            }
44        };
45
46        for (from, to) in input {
47            let from_ix = get_ix(from);
48            let to_ix = get_ix(to);
49
50            graph.entry(from_ix).or_default().push(to_ix);
51            graph.entry(to_ix).or_default().push(from_ix);
52        }
53
54        Graph { graph, inv_names }
55    }
56}
57
58impl Graph<Vec<u8>> {
59    /// Constructs an adjacency list representation of the given GFA
60    /// file input stream, parsing the GFA line-by-line and only
61    /// keeping the links. Returns the graph as an adjacency list and
62    /// a map from graph indices to GFA segment names.
63    pub fn from_gfa_reader<T: BufRead>(reader: &mut T) -> Graph<Vec<u8>> {
64        let lines = &mut reader.byte_lines();
65
66        let parser: GFAParser<Vec<u8>, ()> = GFAParserBuilder {
67            links: true,
68            ..GFAParserBuilder::none()
69        }
70        .build();
71
72        let gfa_lines =
73            lines.filter_map(move |l| parser.parse_gfa_line(&l.unwrap()).ok());
74
75        let mut graph: FxHashMap<usize, AdjacencyList> = FxHashMap::default();
76        let mut name_map: FxHashMap<Vec<u8>, usize> = FxHashMap::default();
77        let mut inv_names = Vec::new();
78
79        let mut get_ix = |name: &[u8]| {
80            if let Some(ix) = name_map.get(name) {
81                *ix
82            } else {
83                let ix = name_map.len();
84                name_map.insert(name.into(), ix);
85                inv_names.push(name.into());
86                ix
87            }
88        };
89
90        for line in gfa_lines {
91            if let gfa::gfa::Line::Link(link) = line {
92                let from_ix = get_ix(link.from_segment.as_ref());
93                let to_ix = get_ix(link.to_segment.as_ref());
94
95                graph.entry(from_ix).or_default().push(to_ix);
96                graph.entry(to_ix).or_default().push(from_ix);
97            }
98        }
99
100        Graph { graph, inv_names }
101    }
102}
103
104impl<N: Clone> Graph<N> {
105    /// Given a vector of graph components (as produced by
106    pub fn invert_components(
107        &self,
108        components: Vec<Vec<usize>>,
109    ) -> Vec<Vec<N>> {
110        components.into_iter().filter_map(|c|{
111            let len = c.len();
112            if len > 1 {
113                let names: Vec<_> = c.iter()
114                    .filter_map(|j| self.inv_names.get(*j))
115                    .cloned()
116                    .collect();
117
118                assert_eq!(len,
119                           names.len(),
120                           "Could not find inverse name when inverting graph components");
121                Some(names)
122            } else {
123                None
124            }
125        }).collect()
126    }
127}