1use crate::db;
8use crate::model::{
9 atom::Atom,
10 grid::Grid,
11 residue::Residue,
12 structure::Structure,
13 types::{Element, Point, ResidueCategory, ResiduePosition, StandardResidue},
14};
15use crate::ops::error::Error;
16use crate::utils::parallel::*;
17use nalgebra::{Matrix3, Rotation3, Vector3};
18use rand::Rng;
19use std::collections::HashSet;
20
21const DISULFIDE_SG_THRESHOLD: f64 = 2.2;
23const N_TERM_PKA: f64 = 8.0;
25const C_TERM_PKA: f64 = 3.1;
27const PHOSPHATE_PKA2: f64 = 6.5;
29
30#[derive(Debug, Clone)]
35pub struct HydroConfig {
36 pub target_ph: Option<f64>,
38 pub remove_existing_h: bool,
40 pub his_strategy: HisStrategy,
42}
43
44impl Default for HydroConfig {
45 fn default() -> Self {
48 Self {
49 target_ph: None,
50 remove_existing_h: true,
51 his_strategy: HisStrategy::HbNetwork,
52 }
53 }
54}
55
56#[derive(Debug, Clone, Copy, PartialEq, Eq)]
58pub enum HisStrategy {
59 DirectHID,
61 DirectHIE,
63 Random,
65 HbNetwork,
67}
68
69pub fn add_hydrogens(structure: &mut Structure, config: &HydroConfig) -> Result<(), Error> {
89 mark_disulfide_bridges(structure);
90
91 let acceptor_grid = if config.his_strategy == HisStrategy::HbNetwork {
92 Some(build_acceptor_grid(structure))
93 } else {
94 None
95 };
96
97 structure
98 .par_chains_mut()
99 .enumerate()
100 .try_for_each(|(c_idx, chain)| {
101 chain
102 .par_residues_mut()
103 .enumerate()
104 .try_for_each(|(r_idx, residue)| {
105 if residue.category != ResidueCategory::Standard {
106 return Ok(());
107 }
108
109 let new_name = determine_protonation_state(
110 residue,
111 config,
112 acceptor_grid.as_ref(),
113 Some((c_idx, r_idx)),
114 );
115
116 if let Some(name) = new_name {
117 residue.name = name.into();
118 }
119
120 if config.remove_existing_h {
121 residue.strip_hydrogens();
122 }
123
124 construct_hydrogens_for_residue(residue, config)
125 })
126 })
127}
128
129fn build_acceptor_grid(structure: &Structure) -> Grid<(usize, usize)> {
139 let atoms: Vec<(Point, (usize, usize))> = structure
140 .iter_chains()
141 .enumerate()
142 .flat_map(|(c_idx, chain)| {
143 chain
144 .iter_residues()
145 .enumerate()
146 .flat_map(move |(r_idx, residue)| {
147 residue
148 .iter_atoms()
149 .filter(|a| matches!(a.element, Element::N | Element::O | Element::F))
150 .map(move |a| (a.pos, (c_idx, r_idx)))
151 })
152 })
153 .collect();
154 Grid::new(atoms, 3.5)
155}
156
157fn determine_protonation_state(
173 residue: &Residue,
174 config: &HydroConfig,
175 grid: Option<&Grid<(usize, usize)>>,
176 indices: Option<(usize, usize)>,
177) -> Option<String> {
178 let std = residue.standard_name?;
179
180 if std == StandardResidue::CYS && residue.name == "CYX" {
181 return None;
182 }
183
184 if let Some(ph) = config.target_ph {
185 return match std {
186 StandardResidue::ASP => Some(if ph < 3.9 {
187 "ASH".to_string()
188 } else {
189 "ASP".to_string()
190 }),
191 StandardResidue::GLU => Some(if ph < 4.2 {
192 "GLH".to_string()
193 } else {
194 "GLU".to_string()
195 }),
196 StandardResidue::LYS => Some(if ph > 10.5 {
197 "LYN".to_string()
198 } else {
199 "LYS".to_string()
200 }),
201 StandardResidue::ARG => Some(if ph > 12.5 {
202 "ARN".to_string()
203 } else {
204 "ARG".to_string()
205 }),
206 StandardResidue::CYS => Some(if ph > 8.3 {
207 "CYM".to_string()
208 } else {
209 "CYS".to_string()
210 }),
211 StandardResidue::TYR => Some(if ph > 10.0 {
212 "TYM".to_string()
213 } else {
214 "TYR".to_string()
215 }),
216 StandardResidue::HIS => {
217 if ph < 6.0 {
218 Some("HIP".to_string())
219 } else {
220 Some(select_neutral_his(
221 residue,
222 config.his_strategy,
223 grid,
224 indices,
225 ))
226 }
227 }
228 _ => None,
229 };
230 }
231
232 if std == StandardResidue::HIS && matches!(residue.name.as_str(), "HIS" | "HID" | "HIE" | "HIP")
233 {
234 return Some(select_neutral_his(
235 residue,
236 config.his_strategy,
237 grid,
238 indices,
239 ));
240 }
241
242 None
243}
244
245fn mark_disulfide_bridges(structure: &mut Structure) {
251 let cys_sulfurs: Vec<(Point, (usize, usize))> = structure
252 .par_chains_mut()
253 .enumerate()
254 .flat_map(|(c_idx, chain)| {
255 chain
256 .par_residues_mut()
257 .enumerate()
258 .filter_map(move |(r_idx, residue)| {
259 if matches!(residue.standard_name, Some(StandardResidue::CYS)) {
260 residue.atom("SG").map(|sg| (sg.pos, (c_idx, r_idx)))
261 } else {
262 None
263 }
264 })
265 })
266 .collect();
267
268 if cys_sulfurs.len() < 2 {
269 return;
270 }
271
272 let grid = Grid::new(cys_sulfurs.clone(), DISULFIDE_SG_THRESHOLD + 0.5);
273
274 let disulfide_residues: HashSet<(usize, usize)> = cys_sulfurs
275 .par_iter()
276 .flat_map_iter(|(pos, (c_idx, r_idx))| {
277 grid.neighbors(pos, DISULFIDE_SG_THRESHOLD)
278 .exact()
279 .filter_map(move |(_, &(neighbor_c, neighbor_r))| {
280 if *c_idx == neighbor_c && *r_idx == neighbor_r {
281 None
282 } else {
283 Some([(*c_idx, *r_idx), (neighbor_c, neighbor_r)])
284 }
285 })
286 })
287 .flatten()
288 .collect();
289
290 if disulfide_residues.is_empty() {
291 return;
292 }
293
294 structure
295 .par_chains_mut()
296 .enumerate()
297 .for_each(|(c_idx, chain)| {
298 chain
299 .par_residues_mut()
300 .enumerate()
301 .for_each(|(r_idx, residue)| {
302 if disulfide_residues.contains(&(c_idx, r_idx)) && residue.name != "CYX" {
303 residue.name = "CYX".into();
304 }
305 });
306 });
307}
308
309fn select_neutral_his(
322 residue: &Residue,
323 strategy: HisStrategy,
324 grid: Option<&Grid<(usize, usize)>>,
325 indices: Option<(usize, usize)>,
326) -> String {
327 match strategy {
328 HisStrategy::DirectHID => "HID".to_string(),
329 HisStrategy::DirectHIE => "HIE".to_string(),
330 HisStrategy::Random => {
331 let mut rng = rand::rng();
332 if rng.random_bool(0.5) {
333 "HID".to_string()
334 } else {
335 "HIE".to_string()
336 }
337 }
338 HisStrategy::HbNetwork => optimize_his_network(residue, grid, indices),
339 }
340}
341
342fn optimize_his_network(
354 residue: &Residue,
355 grid: Option<&Grid<(usize, usize)>>,
356 indices: Option<(usize, usize)>,
357) -> String {
358 let (grid, self_indices) = match (grid, indices) {
359 (Some(g), Some(i)) => (g, i),
360 _ => return "HIE".to_string(),
361 };
362
363 let score_hid = calculate_h_bond_score(residue, "ND1", "CG", "CE1", grid, self_indices);
364
365 let score_hie = calculate_h_bond_score(residue, "NE2", "CD2", "CE1", grid, self_indices);
366
367 if score_hid > score_hie {
368 "HID".to_string()
369 } else {
370 "HIE".to_string()
371 }
372}
373
374fn calculate_h_bond_score(
393 residue: &Residue,
394 n_name: &str,
395 c1_name: &str,
396 c2_name: &str,
397 grid: &Grid<(usize, usize)>,
398 self_idx: (usize, usize),
399) -> f64 {
400 let n = match residue.atom(n_name) {
401 Some(a) => a,
402 None => return 0.0,
403 };
404 let c1 = match residue.atom(c1_name) {
405 Some(a) => a,
406 None => return 0.0,
407 };
408 let c2 = match residue.atom(c2_name) {
409 Some(a) => a,
410 None => return 0.0,
411 };
412
413 let v1 = (c1.pos - n.pos).normalize();
414 let v2 = (c2.pos - n.pos).normalize();
415 let bisector = (v1 + v2).normalize();
416 let h_dir = -bisector;
417 let h_pos = n.pos + h_dir;
418
419 let mut score = 0.0;
420
421 for (a_pos, &idx) in grid.neighbors(&n.pos, 3.5).exact() {
422 if idx == self_idx {
423 continue;
424 }
425
426 let h_a_vec = a_pos - h_pos;
427 let dist_sq = h_a_vec.norm_squared();
428
429 if dist_sq > 2.7 * 2.7 {
430 continue;
431 }
432
433 let h_a_dir = h_a_vec.normalize();
434 let cos_theta = h_dir.dot(&h_a_dir);
435
436 if cos_theta > 0.0 {
437 score += (1.0 / dist_sq) * (cos_theta * cos_theta);
438 }
439 }
440
441 score
442}
443
444fn construct_hydrogens_for_residue(
460 residue: &mut Residue,
461 config: &HydroConfig,
462) -> Result<(), Error> {
463 let template_name = residue.name.clone();
464
465 let template_view =
466 db::get_template(&template_name).ok_or_else(|| Error::MissingInternalTemplate {
467 res_name: template_name.to_string(),
468 })?;
469
470 let existing_atoms: HashSet<String> =
471 residue.atoms().iter().map(|a| a.name.to_string()).collect();
472
473 let rotation_override = if residue.standard_name == Some(StandardResidue::HOH) {
474 let mut rng = rand::rng();
475 Some(
476 Rotation3::from_axis_angle(
477 &Vector3::y_axis(),
478 rng.random_range(0.0..std::f64::consts::TAU),
479 ) * Rotation3::from_axis_angle(
480 &Vector3::x_axis(),
481 rng.random_range(0.0..std::f64::consts::TAU),
482 ),
483 )
484 } else {
485 None
486 };
487
488 for (h_name, h_tmpl_pos, anchors_iter) in template_view.hydrogens() {
489 if existing_atoms.contains(h_name) {
490 continue;
491 }
492
493 let anchors: Vec<&str> = anchors_iter.collect();
494 if let Ok(pos) = reconstruct_geometry(residue, h_tmpl_pos, &anchors, rotation_override) {
495 residue.add_atom(Atom::new(h_name, Element::H, pos));
496 } else {
497 return Err(Error::incomplete_for_hydro(
498 &*residue.name,
499 residue.id,
500 anchors.first().copied().unwrap_or("?"),
501 ));
502 }
503 }
504
505 match residue.position {
506 ResiduePosition::NTerminal if residue.standard_name.is_some_and(|s| s.is_protein()) => {
507 construct_n_term_hydrogens(residue, n_term_should_be_protonated(config))?;
508 }
509 ResiduePosition::CTerminal if residue.standard_name.is_some_and(|s| s.is_protein()) => {
510 construct_c_term_hydrogen(residue, c_term_should_be_protonated(config))?;
511 }
512 ResiduePosition::ThreePrime if residue.standard_name.is_some_and(|s| s.is_nucleic()) => {
513 construct_3_prime_hydrogen(residue)?;
514 }
515 ResiduePosition::FivePrime if residue.standard_name.is_some_and(|s| s.is_nucleic()) => {
516 if residue.has_atom("P") {
517 construct_5_prime_phosphate_hydrogens(residue, config)?;
518 } else if residue.has_atom("O5'") {
519 construct_5_prime_hydrogen(residue)?;
520 }
521 }
522 _ => {}
523 }
524
525 Ok(())
526}
527
528fn n_term_should_be_protonated(config: &HydroConfig) -> bool {
538 config.target_ph.map(|ph| ph < N_TERM_PKA).unwrap_or(true)
539}
540
541fn c_term_should_be_protonated(config: &HydroConfig) -> bool {
551 config.target_ph.map(|ph| ph < C_TERM_PKA).unwrap_or(false)
552}
553
554fn reconstruct_geometry(
567 residue: &Residue,
568 target_tmpl_pos: Point,
569 anchor_names: &[&str],
570 rotation_override: Option<Rotation3<f64>>,
571) -> Result<Point, ()> {
572 let template_view = db::get_template(&residue.name).ok_or(())?;
573
574 let mut residue_pts = Vec::new();
575 let mut template_pts = Vec::new();
576
577 for name in anchor_names {
578 let r_atom = residue.atom(name).ok_or(())?;
579 residue_pts.push(r_atom.pos);
580
581 let t_pos = template_view
582 .heavy_atoms()
583 .find(|(n, _, _)| n == name)
584 .map(|(_, _, p)| p)
585 .ok_or(())?;
586 template_pts.push(t_pos);
587 }
588
589 let (mut rot, mut trans) = calculate_transform(&residue_pts, &template_pts).ok_or(())?;
590
591 if let (Some(override_rot), 1) = (rotation_override, anchor_names.len()) {
592 rot = override_rot.into_inner();
593 trans = residue_pts[0].coords - rot * template_pts[0].coords;
594 }
595
596 Ok(rot * target_tmpl_pos + trans)
597}
598
599fn construct_n_term_hydrogens(residue: &mut Residue, protonated: bool) -> Result<(), Error> {
614 residue.remove_atom("H");
615 residue.remove_atom("H1");
616 residue.remove_atom("H2");
617 residue.remove_atom("H3");
618
619 let n_pos = residue
620 .atom("N")
621 .ok_or_else(|| Error::incomplete_for_hydro(&*residue.name, residue.id, "N"))?
622 .pos;
623 let ca_pos = residue
624 .atom("CA")
625 .ok_or_else(|| Error::incomplete_for_hydro(&*residue.name, residue.id, "CA"))?
626 .pos;
627
628 let v_ca_n = (n_pos - ca_pos).normalize();
629 let bond_len = 1.0;
630 let angle = 109.5_f64.to_radians();
631 let sin_a = angle.sin();
632 let cos_a = angle.cos();
633
634 let up = if v_ca_n.x.abs() < 0.9 {
635 Vector3::x()
636 } else {
637 Vector3::y()
638 };
639 let v_perp = v_ca_n.cross(&up).normalize();
640 let base_dir = v_ca_n.scale(cos_a) + v_perp.scale(sin_a);
641
642 let phases = [0.0_f64, 120.0, 240.0];
643 let mut candidates: Vec<Vector3<f64>> = phases
644 .iter()
645 .map(|deg| {
646 let rot_axis = Rotation3::from_axis_angle(
647 &nalgebra::Unit::new_normalize(v_ca_n),
648 deg.to_radians(),
649 );
650 rot_axis * base_dir
651 })
652 .collect();
653
654 candidates.sort_by(|a, b| {
655 a.dot(&v_ca_n)
656 .partial_cmp(&b.dot(&v_ca_n))
657 .unwrap_or(std::cmp::Ordering::Equal)
658 });
659
660 let target_count = if protonated { 3 } else { 2 };
661 let names = ["H1", "H2", "H3"];
662 for (idx, dir) in candidates.into_iter().take(target_count).enumerate() {
663 residue.add_atom(Atom::new(
664 names[idx],
665 Element::H,
666 n_pos + dir.scale(bond_len),
667 ));
668 }
669
670 Ok(())
671}
672
673fn construct_c_term_hydrogen(residue: &mut Residue, protonated: bool) -> Result<(), Error> {
688 if !protonated {
689 residue.remove_atom("HOXT");
690 residue.remove_atom("HXT");
691 return Ok(());
692 }
693
694 residue.remove_atom("HXT");
695 if residue.has_atom("HOXT") {
696 return Ok(());
697 }
698
699 let c_pos = residue
700 .atom("C")
701 .ok_or_else(|| Error::incomplete_for_hydro(&*residue.name, residue.id, "C"))?
702 .pos;
703 let oxt_pos = residue
704 .atom("OXT")
705 .ok_or_else(|| Error::incomplete_for_hydro(&*residue.name, residue.id, "OXT"))?
706 .pos;
707
708 let direction = oxt_pos - c_pos;
709 if direction.norm_squared() < 1e-6 {
710 return Err(Error::incomplete_for_hydro(
711 &*residue.name,
712 residue.id,
713 "OXT",
714 ));
715 }
716 let dir = direction.normalize();
717 let h_pos = oxt_pos + dir.scale(0.97);
718
719 residue.add_atom(Atom::new("HOXT", Element::H, h_pos));
720 Ok(())
721}
722
723fn construct_3_prime_hydrogen(residue: &mut Residue) -> Result<(), Error> {
737 if residue.has_atom("HO3'") {
738 return Ok(());
739 }
740
741 let o3 = residue
742 .atom("O3'")
743 .ok_or_else(|| Error::incomplete_for_hydro(&*residue.name, residue.id, "O3'"))?
744 .pos;
745 let c3 = residue
746 .atom("C3'")
747 .ok_or_else(|| Error::incomplete_for_hydro(&*residue.name, residue.id, "C3'"))?
748 .pos;
749 let c4 = residue
750 .atom("C4'")
751 .or_else(|| residue.atom("C2'"))
752 .map(|a| a.pos)
753 .unwrap_or_else(|| c3 + Vector3::x());
754
755 let v_c3_o3 = (o3 - c3).normalize();
756
757 let v_c4_c3 = (c3 - c4).normalize();
758 let normal = v_c3_o3.cross(&v_c4_c3).normalize();
759
760 let h_dir = (v_c3_o3 + normal).normalize();
761 let h_pos = o3 + h_dir.scale(0.96);
762
763 residue.add_atom(Atom::new("HO3'", Element::H, h_pos));
764 Ok(())
765}
766
767fn construct_5_prime_hydrogen(residue: &mut Residue) -> Result<(), Error> {
781 if residue.has_atom("HO5'") {
782 return Ok(());
783 }
784
785 let o5 = residue
786 .atom("O5'")
787 .ok_or_else(|| Error::incomplete_for_hydro(&*residue.name, residue.id, "O5'"))?
788 .pos;
789 let c5 = residue
790 .atom("C5'")
791 .ok_or_else(|| Error::incomplete_for_hydro(&*residue.name, residue.id, "C5'"))?
792 .pos;
793
794 let v_c5_o5 = (o5 - c5).normalize();
795 let aux = if v_c5_o5.z.abs() < 0.9 {
796 Vector3::z()
797 } else {
798 Vector3::x()
799 };
800 let perp = v_c5_o5.cross(&aux).normalize();
801
802 let h_dir = (v_c5_o5 + perp).normalize();
803 let h_pos = o5 + h_dir.scale(0.96);
804
805 residue.add_atom(Atom::new("HO5'", Element::H, h_pos));
806 Ok(())
807}
808
809fn construct_5_prime_phosphate_hydrogens(
828 residue: &mut Residue,
829 config: &HydroConfig,
830) -> Result<(), Error> {
831 let ph = config.target_ph.unwrap_or(7.4);
832
833 if ph >= PHOSPHATE_PKA2 {
834 residue.remove_atom("HOP3");
835 residue.remove_atom("HOP2");
836 return Ok(());
837 }
838
839 if residue.has_atom("HOP3") {
840 return Ok(());
841 }
842
843 let op3 = residue
844 .atom("OP3")
845 .ok_or_else(|| Error::incomplete_for_hydro(&*residue.name, residue.id, "OP3"))?
846 .pos;
847 let p = residue
848 .atom("P")
849 .ok_or_else(|| Error::incomplete_for_hydro(&*residue.name, residue.id, "P"))?
850 .pos;
851
852 let direction = (op3 - p).normalize();
853 let h_pos = op3 + direction * 0.96;
854
855 residue.add_atom(Atom::new("HOP3", Element::H, h_pos));
856 Ok(())
857}
858
859fn calculate_transform(r_pts: &[Point], t_pts: &[Point]) -> Option<(Matrix3<f64>, Vector3<f64>)> {
872 let n = r_pts.len();
873 if n != t_pts.len() || n == 0 {
874 return None;
875 }
876
877 let r_center = r_pts.iter().map(|p| p.coords).sum::<Vector3<f64>>() / n as f64;
878 let t_center = t_pts.iter().map(|p| p.coords).sum::<Vector3<f64>>() / n as f64;
879
880 if n == 1 {
881 return Some((Matrix3::identity(), r_center - t_center));
882 }
883
884 if n == 2 {
885 let v_r = r_pts[1] - r_pts[0];
886 let v_t = t_pts[1] - t_pts[0];
887 let rot = Rotation3::rotation_between(&v_t, &v_r).unwrap_or_else(Rotation3::identity);
888 let trans = r_center - rot * t_center;
889 return Some((rot.into_inner(), trans));
890 }
891
892 let mut cov = Matrix3::zeros();
893 for (p_r, p_t) in r_pts.iter().zip(t_pts.iter()) {
894 let v_r = p_r.coords - r_center;
895 let v_t = p_t.coords - t_center;
896 cov += v_r * v_t.transpose();
897 }
898
899 let svd = cov.svd(true, true);
900 let u = svd.u?;
901 let v_t = svd.v_t?;
902
903 let mut rot = u * v_t;
904 if rot.determinant() < 0.0 {
905 let mut corr = Matrix3::identity();
906 corr[(2, 2)] = -1.0;
907 rot = u * corr * v_t;
908 }
909
910 let trans = r_center - rot * t_center;
911 Some((rot, trans))
912}
913
914#[cfg(test)]
915mod tests {
916 use super::*;
917 use crate::model::{
918 atom::Atom,
919 chain::Chain,
920 residue::Residue,
921 types::{Element, Point, ResidueCategory, ResiduePosition, StandardResidue},
922 };
923
924 fn residue_from_template(name: &str, std: StandardResidue, id: i32) -> Residue {
925 let template = db::get_template(name).unwrap_or_else(|| panic!("missing template {name}"));
926 let mut residue = Residue::new(id, None, name, Some(std), ResidueCategory::Standard);
927 residue.position = ResiduePosition::Internal;
928 for (atom_name, element, pos) in template.heavy_atoms() {
929 residue.add_atom(Atom::new(atom_name, element, pos));
930 }
931 residue
932 }
933
934 fn structure_with_residue(residue: Residue) -> Structure {
935 let mut chain = Chain::new("A");
936 chain.add_residue(residue);
937 let mut structure = Structure::new();
938 structure.add_chain(chain);
939 structure
940 }
941
942 fn structure_with_residues(residues: Vec<Residue>) -> Structure {
943 let mut chain = Chain::new("A");
944 for residue in residues {
945 chain.add_residue(residue);
946 }
947 let mut structure = Structure::new();
948 structure.add_chain(chain);
949 structure
950 }
951
952 fn n_terminal_residue(id: i32) -> Residue {
953 let mut residue = residue_from_template("ALA", StandardResidue::ALA, id);
954 residue.position = ResiduePosition::NTerminal;
955 residue
956 }
957
958 fn c_terminal_residue(id: i32) -> Residue {
959 let mut residue = residue_from_template("ALA", StandardResidue::ALA, id);
960 residue.position = ResiduePosition::CTerminal;
961 let c_pos = residue.atom("C").expect("C atom").pos;
962 let o_pos = residue.atom("O").expect("O atom").pos;
963 let offset = c_pos - o_pos;
964 let oxt_pos = c_pos + offset;
965 residue.add_atom(Atom::new("OXT", Element::O, oxt_pos));
966 residue
967 }
968
969 fn five_prime_residue_with_phosphate(id: i32) -> Residue {
970 let template = db::get_template("DA").unwrap();
971 let mut residue = Residue::new(
972 id,
973 None,
974 "DA",
975 Some(StandardResidue::DA),
976 ResidueCategory::Standard,
977 );
978 residue.position = ResiduePosition::FivePrime;
979 for (atom_name, element, pos) in template.heavy_atoms() {
980 residue.add_atom(Atom::new(atom_name, element, pos));
981 }
982 let p_pos = residue.atom("P").unwrap().pos;
983 let op1_pos = residue.atom("OP1").unwrap().pos;
984 let op2_pos = residue.atom("OP2").unwrap().pos;
985 let o5_pos = residue.atom("O5'").unwrap().pos;
986 let centroid = (op1_pos.coords + op2_pos.coords + o5_pos.coords) / 3.0;
987 let direction = (p_pos.coords - centroid).normalize();
988 let op3_pos = p_pos + direction * 1.48;
989 residue.add_atom(Atom::new("OP3", Element::O, op3_pos));
990 residue
991 }
992
993 fn five_prime_residue_without_phosphate(id: i32) -> Residue {
994 let template = db::get_template("DA").unwrap();
995 let mut residue = Residue::new(
996 id,
997 None,
998 "DA",
999 Some(StandardResidue::DA),
1000 ResidueCategory::Standard,
1001 );
1002 residue.position = ResiduePosition::FivePrime;
1003 for (atom_name, element, pos) in template.heavy_atoms() {
1004 if !matches!(atom_name, "P" | "OP1" | "OP2") {
1005 residue.add_atom(Atom::new(atom_name, element, pos));
1006 }
1007 }
1008 residue
1009 }
1010
1011 fn three_prime_residue(id: i32) -> Residue {
1012 let template = db::get_template("DA").unwrap();
1013 let mut residue = Residue::new(
1014 id,
1015 None,
1016 "DA",
1017 Some(StandardResidue::DA),
1018 ResidueCategory::Standard,
1019 );
1020 residue.position = ResiduePosition::ThreePrime;
1021 for (atom_name, element, pos) in template.heavy_atoms() {
1022 residue.add_atom(Atom::new(atom_name, element, pos));
1023 }
1024 residue
1025 }
1026
1027 #[test]
1028 fn titratable_templates_exist_in_database() {
1029 let expected = [
1030 "ASP", "ASH", "GLU", "GLH", "LYS", "LYN", "ARG", "ARN", "CYS", "CYM", "TYR", "TYM",
1031 "HID", "HIE", "HIP",
1032 ];
1033
1034 for name in expected {
1035 assert!(
1036 db::get_template(name).is_some(),
1037 "template {name} should exist"
1038 );
1039 }
1040 }
1041
1042 #[test]
1043 fn determine_protonation_state_tracks_pka_thresholds() {
1044 let structure =
1045 structure_with_residue(residue_from_template("ASP", StandardResidue::ASP, 1));
1046 let mut config = HydroConfig {
1047 target_ph: Some(2.5),
1048 ..HydroConfig::default()
1049 };
1050 assert_eq!(
1051 determine_protonation_state(
1052 structure
1053 .iter_chains()
1054 .next()
1055 .unwrap()
1056 .iter_residues()
1057 .next()
1058 .unwrap(),
1059 &config,
1060 None,
1061 None
1062 ),
1063 Some("ASH".to_string())
1064 );
1065
1066 config.target_ph = Some(5.0);
1067 assert_eq!(
1068 determine_protonation_state(
1069 structure
1070 .iter_chains()
1071 .next()
1072 .unwrap()
1073 .iter_residues()
1074 .next()
1075 .unwrap(),
1076 &config,
1077 None,
1078 None
1079 ),
1080 Some("ASP".to_string())
1081 );
1082
1083 let structure =
1084 structure_with_residue(residue_from_template("LYS", StandardResidue::LYS, 2));
1085 config.target_ph = Some(11.0);
1086 assert_eq!(
1087 determine_protonation_state(
1088 structure
1089 .iter_chains()
1090 .next()
1091 .unwrap()
1092 .iter_residues()
1093 .next()
1094 .unwrap(),
1095 &config,
1096 None,
1097 None
1098 ),
1099 Some("LYN".to_string())
1100 );
1101
1102 config.target_ph = Some(7.0);
1103 assert_eq!(
1104 determine_protonation_state(
1105 structure
1106 .iter_chains()
1107 .next()
1108 .unwrap()
1109 .iter_residues()
1110 .next()
1111 .unwrap(),
1112 &config,
1113 None,
1114 None
1115 ),
1116 Some("LYS".to_string())
1117 );
1118 }
1119
1120 #[test]
1121 fn determine_protonation_state_respects_his_strategy() {
1122 let mut residue = residue_from_template("HID", StandardResidue::HIS, 3);
1123 residue.name = "HIS".into();
1124 let structure = structure_with_residue(residue);
1125
1126 let config = HydroConfig {
1127 target_ph: Some(7.0),
1128 his_strategy: HisStrategy::DirectHIE,
1129 ..HydroConfig::default()
1130 };
1131
1132 assert_eq!(
1133 determine_protonation_state(
1134 structure
1135 .iter_chains()
1136 .next()
1137 .unwrap()
1138 .iter_residues()
1139 .next()
1140 .unwrap(),
1141 &config,
1142 None,
1143 None
1144 ),
1145 Some("HIE".to_string())
1146 );
1147
1148 let mut acid_config = HydroConfig::default();
1149 acid_config.target_ph = Some(5.5);
1150 assert_eq!(
1151 determine_protonation_state(
1152 structure
1153 .iter_chains()
1154 .next()
1155 .unwrap()
1156 .iter_residues()
1157 .next()
1158 .unwrap(),
1159 &acid_config,
1160 None,
1161 None
1162 ),
1163 Some("HIP".to_string())
1164 );
1165 }
1166
1167 #[test]
1168 fn n_terminal_defaults_to_protonated_without_ph() {
1169 let residue = n_terminal_residue(40);
1170 let mut structure = structure_with_residue(residue);
1171
1172 add_hydrogens(&mut structure, &HydroConfig::default()).expect("hydrogenation succeeds");
1173
1174 let residue = structure.find_residue("A", 40, None).unwrap();
1175 assert!(residue.has_atom("H1"));
1176 assert!(residue.has_atom("H2"));
1177 assert!(residue.has_atom("H3"));
1178 }
1179
1180 #[test]
1181 fn n_terminal_deprotonates_above_pka() {
1182 let residue = n_terminal_residue(41);
1183 let mut structure = structure_with_residue(residue);
1184 let mut config = HydroConfig::default();
1185 config.target_ph = Some(9.0);
1186
1187 add_hydrogens(&mut structure, &config).expect("hydrogenation succeeds");
1188
1189 let residue = structure.find_residue("A", 41, None).unwrap();
1190 assert!(residue.has_atom("H1"));
1191 assert!(residue.has_atom("H2"));
1192 assert!(!residue.has_atom("H3"));
1193 }
1194
1195 #[test]
1196 fn c_terminal_protonates_under_acidic_ph() {
1197 let residue = c_terminal_residue(50);
1198 let mut structure = structure_with_residue(residue);
1199 let mut config = HydroConfig::default();
1200 config.target_ph = Some(2.5);
1201
1202 add_hydrogens(&mut structure, &config).expect("hydrogenation succeeds");
1203
1204 let residue = structure.find_residue("A", 50, None).unwrap();
1205 assert!(residue.has_atom("HOXT"));
1206 }
1207
1208 #[test]
1209 fn c_terminal_remains_deprotonated_at_physiological_ph() {
1210 let residue = c_terminal_residue(51);
1211 let mut structure = structure_with_residue(residue);
1212
1213 add_hydrogens(&mut structure, &HydroConfig::default()).expect("hydrogenation succeeds");
1214
1215 let residue = structure.find_residue("A", 51, None).unwrap();
1216 assert!(!residue.has_atom("HOXT"));
1217 }
1218
1219 #[test]
1220 fn determine_protonation_state_skips_already_marked_cyx() {
1221 let mut residue = residue_from_template("CYS", StandardResidue::CYS, 25);
1222 residue.name = "CYX".into();
1223 let structure = structure_with_residue(residue);
1224 let mut config = HydroConfig::default();
1225 config.target_ph = Some(9.0);
1226
1227 assert_eq!(
1228 determine_protonation_state(
1229 structure
1230 .iter_chains()
1231 .next()
1232 .unwrap()
1233 .iter_residues()
1234 .next()
1235 .unwrap(),
1236 &config,
1237 None,
1238 None
1239 ),
1240 None
1241 );
1242 }
1243
1244 #[test]
1245 fn add_hydrogens_populates_internal_lysine_side_chain() {
1246 let mut residue = residue_from_template("LYS", StandardResidue::LYS, 10);
1247 residue.add_atom(Atom::new("FAKE", Element::H, Point::origin()));
1248 let mut structure = structure_with_residue(residue);
1249
1250 add_hydrogens(&mut structure, &HydroConfig::default()).expect("hydrogenation succeeds");
1251
1252 let residue = structure.find_residue("A", 10, None).unwrap();
1253 assert!(residue.has_atom("HZ1"));
1254 assert!(residue.has_atom("HZ2"));
1255 assert!(residue.has_atom("HZ3"));
1256 assert!(!residue.has_atom("FAKE"));
1257 }
1258
1259 #[test]
1260 fn add_hydrogens_relabels_asp_under_acidic_ph() {
1261 let residue = residue_from_template("ASP", StandardResidue::ASP, 15);
1262 let mut structure = structure_with_residue(residue);
1263 let mut config = HydroConfig::default();
1264 config.target_ph = Some(2.0);
1265
1266 add_hydrogens(&mut structure, &config).expect("hydrogenation succeeds");
1267
1268 let residue = structure.find_residue("A", 15, None).unwrap();
1269 assert_eq!(residue.name, "ASH");
1270 assert!(residue.has_atom("HD2"));
1271 }
1272
1273 #[test]
1274 fn construct_hydrogens_errors_when_anchor_missing() {
1275 let template = db::get_template("ALA").expect("template ALA");
1276 let mut residue = Residue::new(
1277 20,
1278 None,
1279 "ALA",
1280 Some(StandardResidue::ALA),
1281 ResidueCategory::Standard,
1282 );
1283 residue.position = ResiduePosition::Internal;
1284
1285 let (name, element, pos) = template.heavy_atoms().next().unwrap();
1286 residue.add_atom(Atom::new(name, element, pos));
1287
1288 let err = construct_hydrogens_for_residue(&mut residue, &HydroConfig::default())
1289 .expect_err("should fail");
1290 assert!(matches!(err, Error::IncompleteResidueForHydro { .. }));
1291 }
1292
1293 #[test]
1294 fn close_cysteines_are_relabelled_to_cyx_and_skip_hydrogens() {
1295 let cys1 = residue_from_template("CYS", StandardResidue::CYS, 30);
1296 let mut cys2 = residue_from_template("CYS", StandardResidue::CYS, 31);
1297
1298 let sg1 = cys1.atom("SG").expect("SG in cys1").pos;
1299 let sg2 = cys2.atom("SG").expect("SG in cys2").pos;
1300 let desired = sg1 + Vector3::new(0.5, 0.0, 0.0);
1301 let offset = desired - sg2;
1302 for atom in cys2.iter_atoms_mut() {
1303 atom.translate_by(&offset);
1304 }
1305
1306 let mut structure = structure_with_residues(vec![cys1, cys2]);
1307 add_hydrogens(&mut structure, &HydroConfig::default()).expect("hydrogenation");
1308
1309 let res1 = structure.find_residue("A", 30, None).unwrap();
1310 let res2 = structure.find_residue("A", 31, None).unwrap();
1311 assert_eq!(res1.name, "CYX");
1312 assert_eq!(res2.name, "CYX");
1313 assert!(!res1.has_atom("HG"));
1314 assert!(!res2.has_atom("HG"));
1315 }
1316
1317 #[test]
1318 fn five_prime_phosphate_deprotonated_at_physiological_ph() {
1319 let residue = five_prime_residue_with_phosphate(60);
1320 let mut structure = structure_with_residue(residue);
1321
1322 add_hydrogens(&mut structure, &HydroConfig::default()).expect("hydrogenation succeeds");
1323
1324 let residue = structure.find_residue("A", 60, None).unwrap();
1325 assert!(residue.has_atom("OP3"), "OP3 should remain");
1326 assert!(
1327 !residue.has_atom("HOP3"),
1328 "HOP3 should not exist at neutral pH"
1329 );
1330 assert!(
1331 !residue.has_atom("HOP2"),
1332 "HOP2 should not exist at neutral pH"
1333 );
1334 }
1335
1336 #[test]
1337 fn five_prime_phosphate_protonated_below_pka() {
1338 let residue = five_prime_residue_with_phosphate(61);
1339 let mut structure = structure_with_residue(residue);
1340 let mut config = HydroConfig::default();
1341 config.target_ph = Some(5.5);
1342
1343 add_hydrogens(&mut structure, &config).expect("hydrogenation succeeds");
1344
1345 let residue = structure.find_residue("A", 61, None).unwrap();
1346 assert!(residue.has_atom("OP3"), "OP3 should remain");
1347 assert!(residue.has_atom("HOP3"), "HOP3 should be added below pKa");
1348 }
1349
1350 #[test]
1351 fn five_prime_without_phosphate_gets_ho5() {
1352 let residue = five_prime_residue_without_phosphate(62);
1353 let mut structure = structure_with_residue(residue);
1354
1355 add_hydrogens(&mut structure, &HydroConfig::default()).expect("hydrogenation succeeds");
1356
1357 let residue = structure.find_residue("A", 62, None).unwrap();
1358 assert!(
1359 residue.has_atom("HO5'"),
1360 "HO5' should be added for 5'-OH terminus"
1361 );
1362 assert!(!residue.has_atom("P"), "phosphorus should not exist");
1363 }
1364
1365 #[test]
1366 fn three_prime_nucleic_gets_ho3() {
1367 let residue = three_prime_residue(70);
1368 let mut structure = structure_with_residue(residue);
1369
1370 add_hydrogens(&mut structure, &HydroConfig::default()).expect("hydrogenation succeeds");
1371
1372 let residue = structure.find_residue("A", 70, None).unwrap();
1373 assert!(
1374 residue.has_atom("HO3'"),
1375 "HO3' should be added for 3' terminal"
1376 );
1377 }
1378}