ff_structure/
pair_set.rs

1//! Pair and PairSet definitions. 
2//!
3//! Compact integer-based representation of base pairs, can 
4//! be used as alternative to PairTable representations.
5//!
6//! A `Pair` is defined by two 16-bit indices (`NAIDX`) packed into a
7//! 32-bit integer key (`P1KEY`) for efficient set and map storage.
8//!
9//! We currently do not povide the conversions from PairSet to 
10//! PairTable, mainly because at this stage it is not clear if
11//! PairSets may be used in the future to include pseudoknots. 
12//! 
13
14use std::fmt;
15use nohash_hasher::IntSet;
16
17use crate::PairTable;
18use crate::NAIDX;
19use crate::P1KEY;
20
21
22/// A base pair (i, j) with i < j.
23#[derive(Debug, Clone, Copy, PartialEq, Eq)]
24pub struct Pair {
25    i: NAIDX,
26    j: NAIDX,
27}
28
29impl Pair {
30    /// Create a new pair (i, j). Panics in debug if i >= j.
31    pub fn new(i: NAIDX, j: NAIDX) -> Self {
32        debug_assert!(i < j);
33        debug_assert!(j < NAIDX::MAX);
34        Pair { i, j }
35    }
36
37    /// Return the 5'-side index.
38    pub fn i(&self) -> NAIDX {
39        self.i
40    }
41
42    /// Return the 3'-side index.
43    pub fn j(&self) -> NAIDX {
44        self.j
45    }
46
47    /// Compact 32-bit key encoding both indices.
48    pub fn key(&self) -> P1KEY {
49        ((self.i as P1KEY) << 16) | (self.j as P1KEY)
50    }
51
52    /// Decode a key back into a `Pair`.
53    pub fn from_key(key: P1KEY) -> Self {
54        let i = (key >> 16) as NAIDX;
55        let j = (key & 0xFFFF) as NAIDX;
56        debug_assert!(i < j);
57        Pair { i, j }
58    }
59}
60
61/// A collection of base pairs represented as compact integer keys.
62#[derive(Debug, Clone, PartialEq, Eq)]
63pub struct PairSet {
64    length: usize,
65    pairs: IntSet<P1KEY>,
66}
67
68impl PairSet {
69    /// Create an empty pair set for a given sequence length.
70    pub fn new(length: usize) -> Self {
71        Self {
72            length,
73            pairs: IntSet::default(),
74        }
75    }
76
77    /// Number of pairs contained in the set.
78    pub fn len(&self) -> usize {
79        self.pairs.len()
80    }
81
82    /// Returns true if there are no pairs.
83    pub fn is_empty(&self) -> bool {
84        self.pairs.is_empty()
85    }
86
87    /// Insert a new pair; returns true if it was newly inserted.
88    pub fn insert(&mut self, pair: Pair) -> bool {
89        debug_assert!((pair.j() as usize) < self.length);
90        self.pairs.insert(pair.key())
91    }
92
93    /// Check if a pair exists in the set.
94    pub fn contains(&self, pair: &Pair) -> bool {
95        self.pairs.contains(&pair.key())
96    }
97
98    /// Iterator over all pairs in arbitrary order.
99    pub fn iter(&self) -> impl Iterator<Item = Pair> + '_ {
100        self.pairs.iter().map(|&k| Pair::from_key(k))
101    }
102
103    /// Return all pairs as a Vec (for deterministic inspection).
104    pub fn to_vec(&self) -> Vec<Pair> {
105        let mut v: Vec<_> = self.iter().collect();
106        v.sort_unstable_by_key(|p| (p.i(), p.j()));
107        v
108    }
109
110    /// Underlying sequence length (from the originating `PairTable`).
111    pub fn length(&self) -> usize {
112        self.length
113    }
114}
115
116impl From<&PairTable> for PairSet {
117    fn from(pt: &PairTable) -> Self {
118        let mut pairs = IntSet::default();
119        for (i, &j_opt) in pt.iter().enumerate() {
120            let i = i as NAIDX;
121            if let Some(j) = j_opt {
122                if i < j {
123                    pairs.insert(Pair::new(i, j).key());
124                }
125            }
126        }
127        Self {
128            length: pt.len(),
129            pairs,
130        }
131    }
132}
133
134impl fmt::Display for PairSet {
135    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
136        let mut first = true;
137        for pair in self.to_vec() {
138            if !first {
139                write!(f, ",")?;
140            }
141            // Only here we show 1-based values for readability.
142            write!(f, "({},{})", pair.i(), pair.j())?;
143            first = false;
144        }
145        Ok(())
146    }
147}
148
149#[cfg(test)]
150mod tests {
151    use super::*;
152
153    #[test]
154    fn test_pair_key_roundtrip() {
155        let p = Pair::new(1, 42);
156        let k = p.key();
157        let q = Pair::from_key(k);
158        assert_eq!(p, q);
159    }
160
161    #[test]
162    fn test_pair_list_from_pair_table() {
163        let pt = PairTable::try_from("((..))").unwrap();
164        let pl = PairSet::from(&pt);
165
166        let expected = vec![Pair::new(0, 5), Pair::new(1, 4)];
167        assert_eq!(pl.length(), 6);
168        assert_eq!(pl.to_vec(), expected);
169
170        for p in &expected {
171            assert!(pl.contains(p));
172        }
173        assert!(!pl.contains(&Pair::new(0, 4)));
174    }
175
176    #[test]
177    fn test_display() {
178        let pt = PairTable::try_from("((..))").unwrap();
179        let pl = PairSet::from(&pt);
180        let s = format!("{}", pl);
181        assert!(s.contains("(0,5)"));
182        assert!(s.contains("(1,4)"));
183    }
184}
185