lorikeet_genome/graphs/
multi_sample_edge.rs

1use std::cmp::Reverse;
2use std::collections::BinaryHeap;
3use std::hash::{Hash, Hasher};
4
5use crate::graphs::base_edge::BaseEdge;
6
7/**
8 * Edge class for connecting nodes in the graph that tracks some per-sample information.
9 * <p>
10 * This class extends BaseEdge with the additional functionality of tracking the maximum
11 * multiplicity seen within any single sample.  The workflow for using this class is:
12 * </p>
13 * <pre>
14 * {@code
15 *      MultiSampleEdge e = new MultiSampleEdge(ref, 1)
16 *      e.incMultiplicity(1)              // total is 2, per sample is 2, max per sample is 1
17 *      e.getPruningMultiplicity()        // = 1
18 *      e.flushSingleSampleMultiplicity() // total is 2, per sample is 0, max per sample is 2
19 *      e.getPruningMultiplicity()        // = 2
20 *      e.incMultiplicity(3)              // total is 5, per sample is 3, max per sample is 2
21 *      e.getPruningMultiplicity()        // = 2
22 *      e.flushSingleSampleMultiplicity() // total is 5, per sample is 0, max per sample is 3
23 *      e.getPruningMultiplicity()        // = 3
24 * }
25 * </pre>
26 */
27#[derive(Debug, Clone)]
28pub struct MultiSampleEdge {
29    current_single_sample_multiplicity: usize,
30    single_sample_capacity: usize,
31    single_sample_multiplicities: BinaryHeap<Reverse<usize>>,
32    reference_path_indexes: Vec<usize>,
33    pub(crate) multiplicity: usize,
34    pub(crate) is_ref: bool,
35}
36
37impl Hash for MultiSampleEdge {
38    fn hash<H: Hasher>(&self, state: &mut H) {
39        self.reference_path_indexes.hash(state);
40        self.multiplicity.hash(state);
41        self.is_ref.hash(state);
42        self.single_sample_capacity.hash(state);
43    }
44}
45
46impl PartialEq for MultiSampleEdge {
47    fn eq(&self, other: &Self) -> bool {
48        self.reference_path_indexes == other.reference_path_indexes
49            && self.is_ref == other.is_ref
50            && self.multiplicity == other.multiplicity
51            && self.single_sample_capacity == other.single_sample_capacity
52    }
53}
54
55impl Eq for MultiSampleEdge {}
56
57impl MultiSampleEdge {
58    pub fn set(&mut self, is_ref: bool, multiplicity: usize, single_sample_capacity: usize) {
59        let mut single_sample_multiplicities = BinaryHeap::with_capacity(single_sample_capacity);
60        single_sample_multiplicities.push(Reverse(multiplicity));
61        self.multiplicity = multiplicity;
62        self.is_ref = is_ref;
63        self.single_sample_capacity = single_sample_capacity;
64        self.single_sample_multiplicities = single_sample_multiplicities;
65        self.current_single_sample_multiplicity = multiplicity;
66        self.reference_path_indexes = Vec::with_capacity(2);
67    }
68
69    /**
70     * update the single sample multiplicities by adding the current single sample multiplicity to the priority queue, and
71     * reset the current single sample multiplicity to 0.
72     */
73    pub fn flush_single_sample_multiplicity(&mut self) {
74        self.single_sample_multiplicities
75            .push(Reverse(self.current_single_sample_multiplicity));
76        if self.single_sample_multiplicities.len() == self.single_sample_capacity + 1 {
77            // remove the lowest multiplicity from the list
78            self.single_sample_multiplicities.pop();
79        } else if self.single_sample_multiplicities.len() > self.single_sample_capacity + 1 {
80            panic!(
81                "Somehow the per sample multiplicity list has grown too big: {:?}",
82                self.single_sample_multiplicities
83            );
84        }
85
86        self.current_single_sample_multiplicity = 0;
87    }
88
89    pub fn inc_multiplicity(&mut self, incr: usize) {
90        self.multiplicity += incr;
91        self.current_single_sample_multiplicity += incr;
92    }
93
94    pub fn get_pruning_multiplicity(&self) -> usize {
95        self.single_sample_multiplicities.peek().unwrap().0
96    }
97
98    pub fn add_reference_index(&mut self, i: usize) {
99        self.reference_path_indexes.push(i)
100    }
101
102    pub fn get_reference_path_indexes(&self) -> &Vec<usize> {
103        &self.reference_path_indexes
104    }
105
106    pub fn get_current_single_sample_multiplicity(&self) -> usize {
107        self.current_single_sample_multiplicity
108    }
109}
110
111impl BaseEdge for MultiSampleEdge {
112    /**
113     * Create a new MultiSampleEdge with weight multiplicity and, if isRef == true, indicates a path through the reference
114     *
115     * @param isRef indicates whether this edge is a path through the reference
116     * @param multiplicity the number of observations of this edge in this sample
117     * @param singleSampleCapacity the max number of samples to track edge multiplicities
118     */
119    fn new(is_ref: bool, multiplicity: usize, single_sample_capacity: usize) -> MultiSampleEdge {
120        let mut single_sample_multiplicities = BinaryHeap::with_capacity(single_sample_capacity);
121        single_sample_multiplicities.push(Reverse(multiplicity));
122
123        MultiSampleEdge {
124            multiplicity,
125            is_ref,
126            single_sample_multiplicities,
127            single_sample_capacity,
128            current_single_sample_multiplicity: multiplicity,
129            reference_path_indexes: Vec::with_capacity(2),
130        }
131    }
132
133    /**
134     * Get the number of observations of paths connecting two vertices
135     * @return a positive integer >= 0
136     */
137    fn get_multiplicity(&self) -> usize {
138        self.multiplicity
139    }
140
141    /**
142     * Get the DOT format label for this edge, to be displayed when printing this edge to a DOT file
143     * @return a non-null string
144     */
145    fn get_dot_label(&self) -> String {
146        return format!(
147            "{}/{}",
148            self.multiplicity.to_string(),
149            self.get_pruning_multiplicity()
150        );
151    }
152
153    /**
154     * Increase the multiplicity of this edge by incr
155     * @param incr the change in this multiplicity, must be >= 0
156     */
157    fn inc_multiplicity(&mut self, incr: usize) {
158        self.multiplicity += incr
159    }
160
161    /**
162     * A special assessor that returns the multiplicity that should be used by pruning algorithm
163     *
164     * @return the multiplicity value that should be used for pruning
165     */
166    fn get_pruning_multiplicity(&self) -> usize {
167        self.single_sample_multiplicities.peek().unwrap().0
168    }
169
170    /**
171     * Set the multiplicity of this edge to value
172     * @param value an integer >= 0
173     */
174    fn set_multiplicity(&mut self, value: usize) {
175        self.multiplicity = value
176    }
177
178    /**
179     * Does this edge indicate a path through the reference graph?
180     * @return true if so
181     */
182    fn is_ref(&self) -> bool {
183        return self.is_ref;
184    }
185
186    /**
187     * Indicate that this edge follows the reference sequence, or not
188     * @param isRef true if this is a reference edge
189     */
190    fn set_is_ref(&mut self, is_ref: bool) {
191        self.is_ref = is_ref
192    }
193
194    /**
195     * Add edge to this edge, updating isRef and multiplicity as appropriate
196     *
197     * isRef is simply the or of this and edge
198     * multiplicity is the sum
199     *
200     * @param edge the edge to add
201     * @return this
202     */
203    fn add(&mut self, edge: Self) {
204        self.multiplicity += edge.multiplicity;
205        self.is_ref = self.is_ref || edge.is_ref;
206    }
207
208    /**
209     * Create a new BaseEdge with the given multiplicity.
210     * The resulting edge is a reference edge if any of the argument edges are reference.
211     *
212     * @param edges a collection of edges to or their isRef values
213     * @param multiplicity our desired multiplicity
214     * @return a newly allocated BaseEdge
215     */
216    fn make_o_r_edge(edges: Vec<Self>, multiplicity: usize, single_sample_capacity: usize) -> Self {
217        assert!(!edges.is_empty(), "Edges cannot be empty");
218        let is_ref = edges.iter().any(|e| e.is_ref());
219
220        Self::new(is_ref, multiplicity, single_sample_capacity)
221    }
222
223    fn to_string(&self) -> String {
224        return format!(
225            "BaseEdge{{multiplicity={}, isRef={}}}",
226            self.multiplicity, self.is_ref
227        );
228    }
229}