1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
//! Needleman Wunsch global alignment with affine gaps.
//!
//! Mirrors [rust-bio](https://github.com/rust-bio/rust-bio) pairwise aligner logic
//! but operates on arbitrary types, not only on u8 as in rust-bio.
//! Made by ChatGPT 5.2.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum AlignmentOperation {
Match,
Subst,
Ins, // gap in y (consume x)
Del, // gap in x (consume y)
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct Alignment {
pub score: i32,
pub operations: Vec<AlignmentOperation>,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum Layer {
S,
I,
D,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum TbS {
Start,
DiagMatch,
DiagSubst,
FromI,
FromD,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum TbGap {
FromS, // opened a gap from S
FromGap, // extended an existing gap (I->I or D->D)
}
/// Global alignment (Needleman–Wunsch) with affine gap penalties:
/// - gap length k costs: gap_open + gap_extend * k
/// - scoring is generic via `score_fn(&T,&T) -> i32`
///
/// Complexity: O(m*n) time, O(m*n) memory (traceback stored).
pub fn global_align_affine<T, F>(
x: &[T],
y: &[T],
gap_open: i32,
gap_extend: i32,
score_fn: F,
) -> Alignment
where
T: PartialEq,
F: Fn(&T, &T) -> i32,
{
// A "negative infinity" that is safe to add to without overflowing.
// Using /4 gives headroom even if you add many penalties.
const MIN_SCORE: i32 = i32::MIN / 4;
#[inline]
fn idx(i: usize, j: usize, n: usize) -> usize {
// row-major, (m+1) x (n+1)
i * (n + 1) + j
}
#[inline]
fn add(a: i32, b: i32) -> i32 {
// If a is effectively -inf, keep it -inf.
// Otherwise saturating add avoids underflow/overflow surprises.
if a <= MIN_SCORE / 2 {
MIN_SCORE
} else {
a.saturating_add(b)
}
}
let m = x.len();
let n = y.len();
let size = (m + 1) * (n + 1);
let mut s = vec![MIN_SCORE; size];
let mut i_mat = vec![MIN_SCORE; size];
let mut d_mat = vec![MIN_SCORE; size];
let mut tb_s = vec![TbS::Start; size];
let mut tb_i = vec![TbGap::FromS; size];
let mut tb_d = vec![TbGap::FromS; size];
// (0,0)
s[idx(0, 0, n)] = 0;
tb_s[idx(0, 0, n)] = TbS::Start;
// Initialize first column (j = 0): must consume x via Ins operations (gap in y).
for ii in 1..=m {
let pos = idx(ii, 0, n);
i_mat[pos] = gap_open + gap_extend * (ii as i32); // gap length ii
tb_i[pos] = if ii == 1 { TbGap::FromS } else { TbGap::FromGap };
s[pos] = i_mat[pos];
tb_s[pos] = TbS::FromI;
// d_mat[pos] stays -inf
}
// Initialize first row (i = 0): must consume y via Del operations (gap in x).
for jj in 1..=n {
let pos = idx(0, jj, n);
d_mat[pos] = gap_open + gap_extend * (jj as i32); // gap length jj
tb_d[pos] = if jj == 1 { TbGap::FromS } else { TbGap::FromGap };
s[pos] = d_mat[pos];
tb_s[pos] = TbS::FromD;
// i_mat[pos] stays -inf
}
// Fill DP.
for ii in 1..=m {
for jj in 1..=n {
let pos = idx(ii, jj, n);
// I(ii,jj): x[ii-1] aligned to a gap (Ins) => come from (ii-1,jj)
let from_i = add(i_mat[idx(ii - 1, jj, n)], gap_extend);
let from_s_open = add(s[idx(ii - 1, jj, n)], gap_open + gap_extend);
// strict '>' like rust-bio: ties prefer opening from S
if from_i > from_s_open {
i_mat[pos] = from_i;
tb_i[pos] = TbGap::FromGap;
} else {
i_mat[pos] = from_s_open;
tb_i[pos] = TbGap::FromS;
}
// D(ii,jj): y[jj-1] aligned to a gap (Del) => come from (ii,jj-1)
let from_d = add(d_mat[idx(ii, jj - 1, n)], gap_extend);
let from_s_open = add(s[idx(ii, jj - 1, n)], gap_open + gap_extend);
if from_d > from_s_open {
d_mat[pos] = from_d;
tb_d[pos] = TbGap::FromGap;
} else {
d_mat[pos] = from_s_open;
tb_d[pos] = TbGap::FromS;
}
// Diagonal: match/subst
let diag = add(
s[idx(ii - 1, jj - 1, n)],
score_fn(&x[ii - 1], &y[jj - 1]),
);
// S(ii,jj) = max(diag, I(ii,jj), D(ii,jj)) with strict '>' priority:
// diag wins ties over I, and I wins ties over D (same comparison order as rust-bio).
let mut best = diag;
tb_s[pos] = if x[ii - 1] == y[jj - 1] {
TbS::DiagMatch
} else {
TbS::DiagSubst
};
if i_mat[pos] > best {
best = i_mat[pos];
tb_s[pos] = TbS::FromI;
}
if d_mat[pos] > best {
best = d_mat[pos];
tb_s[pos] = TbS::FromD;
}
s[pos] = best;
}
}
// Traceback from (m,n), starting in S layer (global).
let mut ops = Vec::with_capacity(m + n);
let mut ii = m;
let mut jj = n;
let mut layer = Layer::S;
while !(ii == 0 && jj == 0 && layer == Layer::S) {
match layer {
Layer::S => match tb_s[idx(ii, jj, n)] {
TbS::Start => break,
TbS::DiagMatch => {
ops.push(AlignmentOperation::Match);
ii -= 1;
jj -= 1;
layer = Layer::S;
}
TbS::DiagSubst => {
ops.push(AlignmentOperation::Subst);
ii -= 1;
jj -= 1;
layer = Layer::S;
}
TbS::FromI => {
layer = Layer::I;
}
TbS::FromD => {
layer = Layer::D;
}
},
Layer::I => {
// An insertion consumes one element of x (ii-1) while jj stays.
ops.push(AlignmentOperation::Ins);
let prev = tb_i[idx(ii, jj, n)];
ii -= 1;
layer = match prev {
TbGap::FromGap => Layer::I,
TbGap::FromS => Layer::S,
};
}
Layer::D => {
// A deletion consumes one element of y (jj-1) while ii stays.
ops.push(AlignmentOperation::Del);
let prev = tb_d[idx(ii, jj, n)];
jj -= 1;
layer = match prev {
TbGap::FromGap => Layer::D,
TbGap::FromS => Layer::S,
};
}
}
}
ops.reverse();
Alignment {
score: s[idx(m, n, n)],
operations: ops,
}
}