lorikeet_genome/graphs/
path.rs

1use gkl::smithwaterman::OverhangStrategy;
2use ordered_float::OrderedFloat;
3use petgraph::stable_graph::{EdgeIndex, NodeIndex};
4use rust_htslib::bam::record::CigarString;
5use std::cmp::Ordering;
6use std::fmt::Debug;
7use std::hash::Hash;
8
9use crate::graphs::base_edge::BaseEdge;
10use crate::graphs::base_graph::BaseGraph;
11use crate::graphs::base_vertex::BaseVertex;
12use crate::pair_hmm::pair_hmm_likelihood_calculation_engine::AVXMode;
13use crate::reads::cigar_utils::CigarUtils;
14use crate::smith_waterman::smith_waterman_aligner::NEW_SW_PARAMETERS;
15use crate::utils::base_utils::BaseUtils;
16
17/**
18 * A path thought a BaseGraph
19 *
20 * class to keep track of paths
21 *
22 */
23#[derive(Debug, Clone, Hash)]
24pub struct Path {
25    pub(crate) last_vertex: NodeIndex,
26    pub(crate) edges_in_order: Vec<EdgeIndex>,
27}
28
29impl Path {
30    /**
31     * Create a new Path containing no edges and starting at initialVertex
32     * @param initialVertex the starting vertex of the path
33     * @param graph the graph this path will follow through
34     */
35    pub fn new(last_vertex: NodeIndex, edges_in_order: Vec<EdgeIndex>) -> Self {
36        Self {
37            last_vertex,
38            edges_in_order,
39        }
40    }
41
42    /**
43     * Create a new Path extending p with edge
44     *
45     * @param p the path to extend.
46     * @param edge the edge to extend path with.
47     *
48     * @throws IllegalArgumentException if {@code p} or {@code edge} are {@code null}, or {@code edge} is
49     * not part of {@code p}'s graph, or {@code edge} does not have as a source the last vertex in {@code p}.
50     */
51    pub fn new_add_edge<V: BaseVertex, E: BaseEdge>(
52        &self,
53        edge: EdgeIndex,
54        graph: &BaseGraph<V, E>,
55    ) -> Path {
56        assert!(
57            graph.contains_edge(edge),
58            "Graph must contain edge {:?} but it doesn't",
59            edge
60        );
61        assert_eq!(
62            graph.get_edge_source(edge),
63            self.last_vertex,
64            "Edges added to path must be contiguous."
65        );
66        let mut edges_in_order = self.edges_in_order.clone();
67        edges_in_order.push(edge);
68        Path {
69            last_vertex: graph.get_edge_target(edge),
70            edges_in_order,
71        }
72    }
73
74    /// Creates a new path continuing on from #[self] that prepends the given edge index to
75    /// the beginning of the path, checking that the edge is contiguous with the rest of the path
76    pub fn new_prepend_edge<V: BaseVertex, E: BaseEdge>(
77        &self,
78        edge: EdgeIndex,
79        graph: &BaseGraph<V, E>,
80    ) -> Path {
81        assert!(
82            graph.contains_edge(edge),
83            "Graph must contain edge {:?} but it doesn't",
84            edge
85        );
86        assert_eq!(
87            graph.get_edge_target(edge),
88            self.get_first_vertex(graph),
89            "Edges added to path must be contiguous."
90        );
91
92        let mut edges_in_order = vec![edge];
93
94        edges_in_order.extend(self.edges_in_order.iter());
95        Self {
96            last_vertex: self.last_vertex,
97            edges_in_order,
98        }
99    }
100
101    /**
102     * Create a new Path extending p with edge
103     *
104     * @param p the path to extend.
105     * @param edges list of edges to extend. Does not check arguments' validity i.e. doesn't check that edges are in order
106     *
107     * @throws IllegalArgumentException if {@code p} or {@code edges} are {@code null} or empty, or {@code edges} is
108     * not part of {@code p}'s graph, or {@code edges} does not have as a source the last vertex in {@code p}.
109     */
110    pub fn new_add_edges<V: BaseVertex, E: BaseEdge>(
111        &self,
112        edges: Vec<EdgeIndex>,
113        graph: &BaseGraph<V, E>,
114    ) -> Path {
115        assert!(
116            edges.iter().all(|edge| graph.contains_edge(*edge)),
117            "Graph does not contain an edge that attempted to be added to the path"
118        );
119
120        let mut tmp_vertex = self.last_vertex;
121        for edge in edges.iter() {
122            if graph.get_edge_source(*edge) != tmp_vertex {
123                panic!("Edges added to the path must be contiguous")
124            };
125            tmp_vertex = graph.get_edge_target(*edge)
126        }
127        let mut edges_in_order = self.edges_in_order.clone();
128        edges_in_order.extend(edges);
129        Path {
130            last_vertex: tmp_vertex,
131            edges_in_order,
132        }
133    }
134
135    // fn paths_are_the_same(&self, path: &Self) -> bool {
136    //     self.edges_in_order == path.edges_in_order
137    // }
138
139    /**
140     * Does this path contain the given vertex?
141     *
142     * @param v a non-null vertex
143     * @return true if v occurs within this path, false otherwise
144     */
145    pub fn contains_vertex<V: BaseVertex, E: BaseEdge>(
146        &self,
147        v: NodeIndex,
148        graph: &BaseGraph<V, E>,
149    ) -> bool {
150        v == self.get_first_vertex(graph)
151            || self
152                .edges_in_order
153                .iter()
154                .map(|e| graph.graph.edge_endpoints(*e).unwrap().1)
155                .any(|edge_target| edge_target == v)
156    }
157
158    pub fn to_string<V: BaseVertex, E: BaseEdge>(&self, graph: &BaseGraph<V, E>) -> String {
159        let joined_path = self
160            .get_vertices(graph)
161            .iter()
162            .map(|v| {
163                std::str::from_utf8(graph.graph.node_weight(*v).unwrap().get_sequence()).unwrap()
164            })
165            .collect::<Vec<&str>>()
166            .join("->");
167
168        return format!("Path{{path={}}}", joined_path);
169    }
170
171    /**
172     * Get the edges of this path in order.
173     * Returns an unmodifiable view of the underlying list
174     * @return a non-null list of edges
175     */
176    pub fn get_edges(&self) -> &Vec<EdgeIndex> {
177        &self.edges_in_order
178    }
179
180    pub fn get_last_edge(&self) -> EdgeIndex {
181        *self.edges_in_order.last().unwrap()
182    }
183
184    /**
185     * Get the list of vertices in this path in order defined by the edges of the path
186     * @return a non-null, non-empty list of vertices
187     */
188    pub fn get_vertices<V: BaseVertex, E: BaseEdge>(
189        &self,
190        graph: &BaseGraph<V, E>,
191    ) -> Vec<NodeIndex> {
192        let mut result = Vec::with_capacity(self.edges_in_order.len() + 1);
193        result.push(self.get_first_vertex(graph));
194        result.extend(
195            self.edges_in_order
196                .iter()
197                .map(|e| graph.graph.edge_endpoints(*e).unwrap().1)
198                .collect::<Vec<NodeIndex>>(),
199        );
200        return result;
201    }
202
203    /**
204     * Get the first vertex in this path
205     * @return a non-null vertex
206     */
207    pub fn get_first_vertex<V: BaseVertex, E: BaseEdge>(
208        &self,
209        graph: &BaseGraph<V, E>,
210    ) -> NodeIndex {
211        if self.edges_in_order.is_empty() {
212            return self.last_vertex;
213        } else {
214            return graph
215                .graph
216                .edge_endpoints(self.edges_in_order[0])
217                .unwrap()
218                .0;
219        }
220    }
221
222    /**
223     * Get the final vertex of the path
224     * @return a non-null vertex
225     */
226    pub fn get_last_vertex(&self) -> NodeIndex {
227        self.last_vertex
228    }
229
230    /**
231     * The base sequence for this path. Pull the full sequence for source nodes and then the suffix for all subsequent nodes
232     * @return  non-null sequence of bases corresponding to this path
233     */
234    pub fn get_bases<V: BaseVertex, E: BaseEdge>(&self, graph: &BaseGraph<V, E>) -> Vec<u8> {
235        if self.edges_in_order.is_empty() {
236            return graph
237                .graph
238                .node_weight(self.last_vertex)
239                .unwrap()
240                .get_additional_sequence(true)
241                .to_vec();
242        }
243
244        let mut bases = graph
245            .graph
246            .node_weight(
247                graph
248                    .graph
249                    .edge_endpoints(self.edges_in_order[0])
250                    .unwrap()
251                    .0,
252            )
253            .unwrap()
254            .get_additional_sequence(true)
255            .to_vec();
256        for e in self.edges_in_order.iter() {
257            bases.extend(
258                graph
259                    .graph
260                    .node_weight(graph.graph.edge_endpoints(*e).unwrap().1)
261                    .unwrap()
262                    .get_additional_sequence(true),
263            );
264        }
265
266        return bases;
267    }
268
269    pub fn get_max_multiplicity<V: BaseVertex, E: BaseEdge>(
270        &self,
271        graph: &BaseGraph<V, E>,
272    ) -> usize {
273        self.get_edges()
274            .iter()
275            .map(|e| graph.graph.edge_weight(*e).unwrap().get_multiplicity())
276            .max()
277            .unwrap_or_default()
278    }
279
280    /**
281     * Calculate the cigar elements for this path against the reference sequence
282     *
283     * @param refSeq the reference sequence that all of the bases in this path should align to
284     * @param aligner
285     * @return a Cigar mapping this path to refSeq, or null if no reasonable alignment could be found
286     */
287    pub fn calculate_cigar<V: BaseVertex, E: BaseEdge>(
288        &self,
289        ref_seq: &[u8],
290        graph: &BaseGraph<V, E>,
291        avx_mode: AVXMode,
292    ) -> CigarString {
293        return CigarUtils::calculate_cigar(
294            ref_seq,
295            &self.get_bases(graph),
296            OverhangStrategy::SoftClip,
297            &*NEW_SW_PARAMETERS,
298            avx_mode,
299        )
300        .unwrap();
301    }
302
303    /**
304     * Length of the path in edges.
305     *
306     * @return {@code 0} or greater.
307     */
308    pub fn len(&self) -> usize {
309        self.edges_in_order.len()
310    }
311}
312
313impl PartialEq for Path {
314    fn eq(&self, other: &Self) -> bool {
315        self.edges_in_order == other.edges_in_order
316    }
317}
318
319// `Eq` needs to be implemented as well.
320impl Eq for Path {}
321
322// impl Hash for Path {
323//     fn hash<H: Hasher>(&self, state: &mut H) {
324//         self.last_vertex.hash(state)
325//     }
326// }
327
328#[derive(Debug, Eq, PartialEq)]
329pub struct Chain<'a, V: BaseVertex + Sync + std::fmt::Debug, E: BaseEdge + Sync + std::fmt::Debug> {
330    pub log_odds: OrderedFloat<f64>,
331    pub path: &'a Path,
332    pub graph: &'a BaseGraph<V, E>,
333}
334
335impl<'a, V: BaseVertex + Sync + std::fmt::Debug, E: BaseEdge + Sync + std::fmt::Debug>
336    Chain<'a, V, E>
337{
338    pub fn new(
339        log_odds: OrderedFloat<f64>,
340        path: &'a Path,
341        graph: &'a BaseGraph<V, E>,
342    ) -> Chain<'a, V, E> {
343        Chain {
344            log_odds,
345            path,
346            graph,
347        }
348    }
349}
350
351impl<'a, V: BaseVertex + Sync + std::fmt::Debug, E: BaseEdge + Sync + std::fmt::Debug> Ord
352    for Chain<'a, V, E>
353{
354    fn cmp(&self, other: &Self) -> Ordering {
355        (-self.log_odds).cmp(&-other.log_odds).then_with(|| {
356            BaseUtils::bases_comparator(
357                self.graph
358                    .get_sequence_from_index(self.path.get_first_vertex(self.graph)),
359                other
360                    .graph
361                    .get_sequence_from_index(other.path.get_first_vertex(other.graph)),
362            )
363        })
364    }
365}
366
367impl<'a, V: BaseVertex, E: BaseEdge> PartialOrd for Chain<'a, V, E> {
368    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
369        Some(self.cmp(other))
370    }
371}