1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
use crate::error::*;
use crate::structs::*;
use std::collections::HashMap;
/// Validate the SEQRES data found, if there is any
#[allow(
clippy::comparison_chain,
clippy::cast_possible_wrap,
clippy::cast_sign_loss
)]
pub fn validate_seqres(
pdb: &mut PDB,
sequence: HashMap<String, Vec<(usize, usize, Vec<String>)>>,
lines: Vec<String>,
start_linenumber: usize,
context: &Context,
) -> Vec<PDBError> {
let mut errors = Vec::new();
for (chain_id, data) in sequence {
if let Some(chain) = pdb.chains_mut().find(|c| c.id() == chain_id) {
let mut chain_sequence = Vec::new();
let mut serial = 1;
let mut residues = 0;
for (line, (ser_num, res_num, seq)) in data.into_iter().enumerate() {
if serial != ser_num {
errors.push(PDBError::new(
ErrorLevel::StrictWarning,
"SEQRES serial number invalid",
format!("The serial number for SEQRES chain \"{chain_id}\" with number \"{ser_num}\" does not follow sequentially from the previous row."),
context.clone()
));
}
serial += 1;
if residues == 0 {
residues = res_num;
} else if residues != res_num {
errors.push(PDBError::new(
ErrorLevel::StrictWarning,
"SEQRES residue total invalid",
format!("The residue total for SEQRES chain \"{chain_id}\" with number \"{ser_num}\" does not match the total on the first row for this chain."),
context.clone()
));
}
chain_sequence.extend(
seq.into_iter()
.enumerate()
.map(|(column, item)| (item, (line, column))),
);
}
if chain_sequence.len() != residues {
errors.push(PDBError::new(
ErrorLevel::LooseWarning,
"SEQRES residue total invalid",
format!("The residue total for SEQRES chain \"{chain_id}\" does not match the total residues found in the seqres records."),
context.clone()
));
}
let mut offset = 0;
if let Some(db_ref) = chain.database_reference() {
offset = db_ref.pdb_position.start;
for dif in &db_ref.differences {
if dif.database_residue.is_none() && dif.residue.1 < db_ref.pdb_position.start {
// If there is a residue in front of the db sequence
offset -= 1;
}
}
if db_ref.pdb_position.end - offset + 1 != residues as isize {
errors.push(PDBError::new(
ErrorLevel::LooseWarning,
"SEQRES residue total invalid",
format!("The residue total ({}) for SEQRES chain \"{}\" does not match the total residues found in the dbref record ({}).", residues, chain_id, db_ref.pdb_position.end - offset + 1),
context.clone()
));
}
}
let copy = chain.clone();
let mut chain_res = copy.residues();
let mut next = chain_res.next();
// Contain all inconsistencies between the SEQRES and found residues, group all by `chain_id` and keep a list of index, seqres item position, and chain_sequence.
let mut seqres_inconsistent = Vec::new();
for (raw_index, (seq, position)) in chain_sequence.iter().enumerate() {
let index = raw_index as isize + offset;
if let Some(n) = next {
if index == n.serial_number() {
if let Some(name) = n.name() {
if *seq != name {
#[allow(clippy::type_complexity)]
if let Some(item) = seqres_inconsistent.iter_mut().find(
|item: &&mut (&str, Vec<(isize, (usize, usize), &str)>)| {
item.0 == chain_id
},
) {
item.1.push((index, *position, name));
} else {
seqres_inconsistent
.push((&chain_id, vec![(index, *position, name)]));
}
}
} else {
errors.push(PDBError::new(
ErrorLevel::StrictWarning,
"Multiple residues in SEQRES validation",
format!("The residue index {index} in chain {chain_id} has no conformers or multiple with different names. The program cannot validate the SEQRES record in this way."),
context.clone()
)); // TODO: show found residues
}
next = chain_res.next();
} else if index < n.serial_number() {
chain.add_residue(
Residue::new(
index,
None,
Some(
Conformer::new(seq, None, None)
.expect("Invalid characters in Conformer generation"),
),
)
.expect("Invalid characters in Residue generation"),
);
chain.sort();
} else {
errors.push(PDBError::new(
ErrorLevel::LooseWarning,
"Chain residue invalid",
format!("The residue index {} value \"{:?}\" for Chain \"{}\" is not sequentially increasing, value expected: {}.", n.serial_number(), n.name(), chain_id, index),
context.clone()
));
#[allow(clippy::while_let_on_iterator)]
while let Some(n) = chain_res.next() {
if n.serial_number() == index {
next = chain_res.next();
break;
}
}
}
} else {
chain.add_residue(
Residue::new(
index,
None,
Some(
Conformer::new(seq, None, None)
.expect("Invalid characters in Conformer generation"),
),
)
.expect("Invalid characters in Residue generation"),
);
chain.sort();
}
}
if !seqres_inconsistent.is_empty() {
for (_chain, inconsistencies) in seqres_inconsistent {
// Find the extremes of the used lines from all SEQRES lines, assuming that all lines are continuous.
let used_lines = inconsistencies
.iter()
.fold((usize::MAX, usize::MIN), |a, v| {
(v.1 .0.min(a.0), v.1 .0.max(a.1))
});
let context_lines = &lines[used_lines.0..=used_lines.1];
let mut highlights = Vec::new();
let mut found_residues = Vec::new();
// Create all highlights in the original SEQRES definition.
for inconsistency in inconsistencies {
let line_offset = inconsistency.1 .0 - used_lines.0;
highlights.push((line_offset, 19 + 4 * inconsistency.1 .1, 3)); // Calculate the correct character position for the highlight (is index right now)
// Add the found residue to the list of found residues, in the correct line
if let Some(line) = found_residues
.iter_mut()
.find(|v: &&mut (usize, Vec<(usize, &str)>)| v.0 == line_offset)
{
line.1.push((inconsistency.1 .1, inconsistency.2));
} else {
found_residues
.push((line_offset, vec![(inconsistency.1 .1, inconsistency.2)]));
}
}
// Add all found residues into lines matching the SEQRES definition
let found_residues = found_residues
.iter()
.map(|line| {
line.1
.iter()
.enumerate()
.fold(("".to_string(), 0), |acc, v| {
(
acc.0
+ if v.1 .0.saturating_sub(1) == acc.1 || v.0 == 0 {
" "
} else {
" ... "
}
+ v.1 .1,
v.1 .0,
)
})
.0
})
.collect();
// Create the final error message. See example:
// LooseWarning: SEQRES inconsistent residues
//
// |
// 1 | SEQRES 1 B 475 GLY PRO ASN ILE CYS THR THR ARG GLY VAL SER SER CYS (SEQRES definition)
// | ^^^ ^^^ ^^^ ^^^
// |
//
// |
// 1 | HOH HOH HOH HOH (found residues)
// |
// The residues as defined in the SEQRES records do not match with the found residues, see above for details.
errors.push(PDBError::new(
ErrorLevel::LooseWarning,
"SEQRES inconsistent residues",
"The residues as defined in the SEQRES records do not match with the found residues, see above for details.",
Context::Multiple { contexts: vec![
(Some("SEQRES definition".to_string()), Context::RangeHighlights{start_linenumber, lines: context_lines.to_vec(), highlights}),
(Some("Residues found in ATOM definitions".to_string()), Context::RangeHighlights { start_linenumber: 0, lines: found_residues, highlights: Vec::new() })]}
));
}
}
let total_found = chain
.residues()
.filter(|r| !r.atoms().any(Atom::hetero)) // TODO: It filters out all residues with at least one HETATM, this should be changed to include in the total residues defined in HETNAM.
.count();
if chain_sequence.len() != total_found {
errors.push(PDBError::new(
ErrorLevel::LooseWarning,
"SEQRES residue total invalid",
format!("The residue total ({}) for SEQRES chain \"{}\" does not match the total residues found in the chain ({}).", chain_sequence.len(), chain_id, total_found),
context.clone()
));
}
}
}
errors
}