Skip to main content

pa_types/
lib.rs

1//! Common types for alignments:
2//!
3//! Sequence types:
4//! - [`Base`] : `u8`,
5//! - [`Sequence`] : `Vec<Base>`,
6//! - [`Seq`] : `&[Base]`.
7//!
8//! Affine cost models:
9//! - [`CostModel`],
10//! - [`ScoreModel`].
11//!
12//! Alignment path:
13//! - index [`I`] : `i32`,
14//! - position type [`Pos`] : `(I, I)` for (text/ref index, pattern/query index),
15//! - [`Path`] : `Vec<Pos>`.
16//!
17//! Cigar strings (see [`cigar`] module documentation):
18//! - single character [`CigarOp`] : match/sub/ins/del,
19//! - repeated 'unit' [`CigarElem`] : [`CigarOp`] with length,
20//! - cigar string [`Cigar`] : `Vec<CigarElem>`.
21pub mod cigar;
22pub mod cost;
23
24use std::cmp::Ordering;
25
26// Re-export types for convenience of `use pa_types::*;`.
27pub use cigar::*;
28pub use cost::*;
29
30/// A single base
31// NOTE: This is also part of rust-bio-types.
32pub type Base = u8;
33
34/// A vector of bases.
35// NOTE: This is also part of rust-bio-types.
36pub type Sequence = Vec<Base>;
37
38/// A slice of bases.
39pub type Seq<'a> = &'a [Base];
40
41/// Convert `seq` to a `String`.
42///
43/// Convenience wrapper around `String::from_utf8`.
44pub fn seq_to_string(seq: Seq) -> String {
45    String::from_utf8(seq.to_vec()).unwrap()
46}
47
48/// A 0-based index into a sequence.
49pub type I = i32;
50
51/// A (text/ref, pattern/query) position in a pairwise alignment.
52///
53/// A global alignment starts at `(0,0)` and ends at `(n, m)`.
54#[derive(
55    Debug,
56    Clone,
57    Copy,
58    PartialEq,
59    Eq,
60    Hash,
61    Default,
62    derive_more::Add,
63    derive_more::Sub,
64    derive_more::AddAssign,
65    derive_more::SubAssign,
66)]
67pub struct Pos(pub I, pub I);
68
69impl std::fmt::Display for Pos {
70    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
71        <Self as std::fmt::Debug>::fmt(self, f)
72    }
73}
74
75/// Partial ordering by
76/// (a,b) <= (c,d) when a<=c and b<=d.
77/// (a,b) < (c,d) when a<=c and b<=d and a<c or b<d.
78impl PartialOrd for Pos {
79    #[inline]
80    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
81        let a = self.0.cmp(&other.0);
82        let b = self.1.cmp(&other.1);
83        if a == b {
84            return Some(a);
85        }
86        if a == Ordering::Equal {
87            return Some(b);
88        }
89        if b == Ordering::Equal {
90            return Some(a);
91        }
92        None
93    }
94
95    #[inline]
96    fn le(&self, other: &Self) -> bool {
97        self.0 <= other.0 && self.1 <= other.1
98    }
99}
100
101/// The path corresponding to an alignment of two sequences.
102pub type Path = Vec<Pos>;
103
104impl Pos {
105    /// The start of an alignment.
106    pub fn start() -> Self {
107        Pos(0, 0)
108    }
109
110    /// The target of an alignment.
111    pub fn target(a: Seq, b: Seq) -> Self {
112        Pos(a.len() as I, b.len() as I)
113    }
114
115    /// The diagonal of position `(i, j)` is `i-j`.
116    pub fn diag(&self) -> I {
117        self.0 - self.1
118    }
119
120    /// The anti diagonal of position `(i, j)` is `i+j`.
121    pub fn anti_diag(&self) -> I {
122        self.0 + self.1
123    }
124
125    /// Mirror this position: `(i, j) -> (j, i)`.
126    pub fn mirror(&self) -> Pos {
127        Pos(self.1, self.0)
128    }
129
130    /// Create a position from differently typed positions.
131    pub fn from<T>(i: T, j: T) -> Self
132    where
133        T: TryInto<I>,
134        <T as TryInto<i32>>::Error: std::fmt::Debug,
135    {
136        Pos(i.try_into().unwrap(), j.try_into().unwrap())
137    }
138}
139
140/// A small wrapper around Pos that implements Ord for lexicographic ordering.
141#[derive(Debug, Clone, Copy, PartialEq, Eq)]
142pub struct LexPos(pub Pos);
143
144impl PartialOrd for LexPos {
145    #[inline]
146    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
147        Some(self.cmp(other))
148    }
149
150    #[inline]
151    fn lt(&self, other: &Self) -> bool {
152        (self.0 .0, self.0 .1) < (other.0 .0, other.0 .1)
153    }
154}
155
156impl Ord for LexPos {
157    #[inline]
158    fn cmp(&self, other: &Self) -> Ordering {
159        (self.0 .0, self.0 .1).cmp(&(other.0 .0, other.0 .1))
160    }
161}
162
163/// Generic global pairwise alignment interface.
164pub trait Aligner: std::fmt::Debug {
165    /// An alignment of sequences `a` and `b`.
166    /// The returned cost is the *non-negative* cost of the alignment.
167    /// Costmodel and traceback parameters must be specified on construction of the aligner.
168    fn align(&mut self, a: Seq, b: Seq) -> (Cost, Option<Cigar>);
169}