1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
use std::io::prelude::*;

use fxhash::FxHashMap;

use bstr::io::*;

use gfa::parser::{GFAParser, GFAParserBuilder};

pub type AdjacencyList = Vec<usize>;
pub type FxMapGraph = FxHashMap<usize, AdjacencyList>;

/// An adjacency list representation of a generic graph, including the
/// map required to go from node index to the original node name. The
/// `N` type parameter is the node name in the original graph, e.g.
/// `BString` for GFA graphs, or `usize` for graphs that use integer
/// names.
pub struct Graph<N> {
    pub graph: FxMapGraph,
    pub inv_names: Vec<N>,
}

impl Graph<usize> {
    /// Construct an adjacency graph from an iterator over the edges
    /// of an existing graph. Both the input and output have `usize`
    /// node IDs, but `from_edges` performs a transformation to ensure
    /// all the node IDs are consecutive starting from 0.
    pub fn from_edges<I>(input: I) -> Graph<usize>
    where
        I: Iterator<Item = (usize, usize)>,
    {
        let mut graph: FxHashMap<usize, AdjacencyList> = FxHashMap::default();
        let mut name_map: FxHashMap<usize, usize> = FxHashMap::default();
        let mut inv_names = Vec::new();

        let mut get_ix = |name: usize| {
            if let Some(ix) = name_map.get(&name) {
                *ix
            } else {
                let ix = name_map.len();
                name_map.insert(name, ix);
                inv_names.push(name);
                ix
            }
        };

        for (from, to) in input {
            let from_ix = get_ix(from);
            let to_ix = get_ix(to);

            graph.entry(from_ix).or_default().push(to_ix);
            graph.entry(to_ix).or_default().push(from_ix);
        }

        Graph { graph, inv_names }
    }
}

impl Graph<Vec<u8>> {
    /// Constructs an adjacency list representation of the given GFA
    /// file input stream, parsing the GFA line-by-line and only
    /// keeping the links. Returns the graph as an adjacency list and
    /// a map from graph indices to GFA segment names.
    pub fn from_gfa_reader<T: BufRead>(reader: &mut T) -> Graph<Vec<u8>> {
        let lines = &mut reader.byte_lines();

        let parser: GFAParser<Vec<u8>, ()> = GFAParserBuilder {
            links: true,
            ..GFAParserBuilder::none()
        }
        .build();

        let gfa_lines =
            lines.filter_map(move |l| parser.parse_gfa_line(&l.unwrap()).ok());

        let mut graph: FxHashMap<usize, AdjacencyList> = FxHashMap::default();
        let mut name_map: FxHashMap<Vec<u8>, usize> = FxHashMap::default();
        let mut inv_names = Vec::new();

        let mut get_ix = |name: &[u8]| {
            if let Some(ix) = name_map.get(name) {
                *ix
            } else {
                let ix = name_map.len();
                name_map.insert(name.into(), ix);
                inv_names.push(name.into());
                ix
            }
        };

        for line in gfa_lines {
            if let gfa::gfa::Line::Link(link) = line {
                let from_ix = get_ix(link.from_segment.as_ref());
                let to_ix = get_ix(link.to_segment.as_ref());

                graph.entry(from_ix).or_default().push(to_ix);
                graph.entry(to_ix).or_default().push(from_ix);
            }
        }

        Graph { graph, inv_names }
    }
}

impl<N: Clone> Graph<N> {
    /// Given a vector of graph components (as produced by
    pub fn invert_components(
        &self,
        components: Vec<Vec<usize>>,
    ) -> Vec<Vec<N>> {
        components.into_iter().filter_map(|c|{
            let len = c.len();
            if len > 1 {
                let names: Vec<_> = c.iter()
                    .filter_map(|j| self.inv_names.get(*j))
                    .cloned()
                    .collect();

                assert_eq!(len,
                           names.len(),
                           "Could not find inverse name when inverting graph components");
                Some(names)
            } else {
                None
            }
        }).collect()
    }
}