sigalign_core/results/
deduplicate.rs

1use std::cmp::Ordering;
2
3use ahash::AHashSet;
4
5use super::{
6    QueryAlignment,
7    TargetAlignment,
8    Alignment,
9    AlignmentOperation, AlignmentPosition,
10};
11
12impl QueryAlignment {
13    /// Deduplicate the alignments by connected (Match or Subst) base pairs positions.
14    pub fn deduplicated(self) -> Self {
15        let mut paths = AHashSet::new();
16
17        Self(
18            self.0.into_iter().map(|v| {
19                v.deduplicated_with_paths_buffer(&mut paths)
20            }).collect()
21        )
22    }
23}
24
25impl TargetAlignment {
26    pub fn deduplicated(self) -> Self {
27        let mut paths = AHashSet::new();
28        self.deduplicated_with_paths_buffer(&mut paths)
29    }
30    fn deduplicated_with_paths_buffer(mut self, paths: &mut AHashSet<(u32, u32)>) -> Self {
31        paths.clear();
32
33        self.alignments.sort_unstable_by(|a, b| {
34            cmp_alignment_by_query_position(a, b)
35        });
36
37        let temporary_vec = std::mem::take(&mut self.alignments);
38        // - Same as
39        // let temporary_vec = std::mem::replace(
40        //     &mut self.alignments,
41        //     Vec::new(),
42        // );
43        // TODO: Which is better?
44        
45        self.alignments = temporary_vec.into_iter().filter(|v| {
46            let path = v.get_path();
47            if paths.is_disjoint(&path) {
48                paths.extend(path);
49                true
50            } else {
51                false
52            }
53        }).collect();
54        self
55    }
56}
57
58fn cmp_alignment_by_query_position(
59    a: &Alignment,
60    b: &Alignment,
61) -> Ordering {
62    b.position.get_query_length().cmp(&a.position.get_query_length())
63        .then(a.position.query.0.cmp(&b.position.query.0))
64}
65
66impl AlignmentPosition {
67    fn get_query_length(&self) -> u32 {
68        self.query.1 - self.query.0
69    }
70}
71
72impl Alignment {
73    fn get_path(&self) -> AHashSet<(u32, u32)> {
74        let (mut query_index, mut target_index) = {
75            let query_index = self.position.query.0;
76            let target_index = self.position.target.0;
77            (query_index, target_index)
78        };
79        let mut paths = AHashSet::new();
80        self.operations.iter().for_each(|operation| {
81            match operation.operation {
82                AlignmentOperation::Match | AlignmentOperation::Subst => {
83                    for _ in 0..operation.count {
84                        paths.insert((query_index, target_index));
85                        query_index += 1;
86                        target_index += 1;
87                    }
88                },
89                AlignmentOperation::Deletion => {
90                    target_index += operation.count;
91                },
92                AlignmentOperation::Insertion => {
93                    query_index += operation.count;
94                },
95            }
96        });
97        paths
98    }
99}