1use crate::db;
8use crate::model::{
9 atom::Atom,
10 residue::Residue,
11 structure::Structure,
12 types::{Element, Point, ResidueCategory, ResiduePosition, StandardResidue},
13};
14use crate::ops::error::Error;
15use nalgebra::{Matrix3, Rotation3, Vector3};
16use rand::Rng;
17use std::collections::HashSet;
18
19const DISULFIDE_SG_THRESHOLD: f64 = 2.2;
21const N_TERM_PKA: f64 = 8.0;
23const C_TERM_PKA: f64 = 3.1;
25const PHOSPHATE_PKA2: f64 = 6.5;
27
28#[derive(Debug, Clone)]
33pub struct HydroConfig {
34 pub target_ph: Option<f64>,
36 pub remove_existing_h: bool,
38 pub his_strategy: HisStrategy,
40}
41
42impl Default for HydroConfig {
43 fn default() -> Self {
46 Self {
47 target_ph: None,
48 remove_existing_h: true,
49 his_strategy: HisStrategy::HbNetwork,
50 }
51 }
52}
53
54#[derive(Debug, Clone, Copy, PartialEq, Eq)]
56pub enum HisStrategy {
57 DirectHID,
59 DirectHIE,
61 Random,
63 HbNetwork,
65}
66
67pub fn add_hydrogens(structure: &mut Structure, config: &HydroConfig) -> Result<(), Error> {
87 let mut targets = Vec::new();
88 for (c_idx, chain) in structure.iter_chains().enumerate() {
89 for (r_idx, residue) in chain.iter_residues().enumerate() {
90 if residue.category == ResidueCategory::Standard {
91 targets.push((c_idx, r_idx));
92 }
93 }
94 }
95
96 mark_disulfide_bridges(structure);
97
98 for (c_idx, r_idx) in targets {
99 let new_name = determine_protonation_state(structure, c_idx, r_idx, config);
100
101 let chain = structure.iter_chains_mut().nth(c_idx).unwrap();
102 let residue = chain.iter_residues_mut().nth(r_idx).unwrap();
103
104 if let Some(name) = new_name {
105 residue.name = name;
106 }
107
108 if config.remove_existing_h {
109 residue.strip_hydrogens();
110 }
111
112 construct_hydrogens_for_residue(residue, config)?;
113 }
114
115 Ok(())
116}
117
118fn determine_protonation_state(
134 structure: &Structure,
135 c_idx: usize,
136 r_idx: usize,
137 config: &HydroConfig,
138) -> Option<String> {
139 let chain = structure.iter_chains().nth(c_idx)?;
140 let residue = chain.iter_residues().nth(r_idx)?;
141 let std = residue.standard_name?;
142
143 if std == StandardResidue::CYS && residue.name == "CYX" {
144 return None;
145 }
146
147 if let Some(ph) = config.target_ph {
148 return match std {
149 StandardResidue::ASP => Some(if ph < 3.9 {
150 "ASH".to_string()
151 } else {
152 "ASP".to_string()
153 }),
154 StandardResidue::GLU => Some(if ph < 4.2 {
155 "GLH".to_string()
156 } else {
157 "GLU".to_string()
158 }),
159 StandardResidue::LYS => Some(if ph > 10.5 {
160 "LYN".to_string()
161 } else {
162 "LYS".to_string()
163 }),
164 StandardResidue::ARG => Some(if ph > 12.5 {
165 "ARN".to_string()
166 } else {
167 "ARG".to_string()
168 }),
169 StandardResidue::CYS => Some(if ph > 8.3 {
170 "CYM".to_string()
171 } else {
172 "CYS".to_string()
173 }),
174 StandardResidue::TYR => Some(if ph > 10.0 {
175 "TYM".to_string()
176 } else {
177 "TYR".to_string()
178 }),
179 StandardResidue::HIS => {
180 if ph < 6.0 {
181 Some("HIP".to_string())
182 } else {
183 Some(select_neutral_his(structure, residue, &config.his_strategy))
184 }
185 }
186 _ => None,
187 };
188 }
189
190 if std == StandardResidue::HIS && matches!(residue.name.as_str(), "HIS" | "HID" | "HIE" | "HIP")
191 {
192 return Some(select_neutral_his(structure, residue, &config.his_strategy));
193 }
194
195 None
196}
197
198fn mark_disulfide_bridges(structure: &mut Structure) {
204 let mut cys_sulfurs = Vec::new();
205 for (c_idx, chain) in structure.iter_chains().enumerate() {
206 for (r_idx, residue) in chain.iter_residues().enumerate() {
207 if let (true, Some(sg)) = (
208 matches!(residue.standard_name, Some(StandardResidue::CYS)),
209 residue.atom("SG"),
210 ) {
211 cys_sulfurs.push((c_idx, r_idx, sg.pos));
212 }
213 }
214 }
215
216 if cys_sulfurs.len() < 2 {
217 return;
218 }
219
220 let threshold_sq = DISULFIDE_SG_THRESHOLD * DISULFIDE_SG_THRESHOLD;
221 let mut disulfide_residues: HashSet<(usize, usize)> = HashSet::new();
222 for i in 0..cys_sulfurs.len() {
223 for j in (i + 1)..cys_sulfurs.len() {
224 let (ci, ri, pos_i) = &cys_sulfurs[i];
225 let (cj, rj, pos_j) = &cys_sulfurs[j];
226 if (*pos_i - *pos_j).norm_squared() <= threshold_sq {
227 disulfide_residues.insert((*ci, *ri));
228 disulfide_residues.insert((*cj, *rj));
229 }
230 }
231 }
232
233 if disulfide_residues.is_empty() {
234 return;
235 }
236
237 for (c_idx, chain) in structure.iter_chains_mut().enumerate() {
238 for (r_idx, residue) in chain.iter_residues_mut().enumerate() {
239 if disulfide_residues.contains(&(c_idx, r_idx)) && residue.name != "CYX" {
240 residue.name = "CYX".to_string();
241 }
242 }
243 }
244}
245
246fn select_neutral_his(structure: &Structure, residue: &Residue, strategy: &HisStrategy) -> String {
258 match strategy {
259 HisStrategy::DirectHID => "HID".to_string(),
260 HisStrategy::DirectHIE => "HIE".to_string(),
261 HisStrategy::Random => {
262 let mut rng = rand::rng();
263 if rng.random_bool(0.5) {
264 "HID".to_string()
265 } else {
266 "HIE".to_string()
267 }
268 }
269 HisStrategy::HbNetwork => optimize_his_network(structure, residue),
270 }
271}
272
273fn optimize_his_network(structure: &Structure, residue: &Residue) -> String {
284 let nd1 = residue.atom("ND1");
285 let ne2 = residue.atom("NE2");
286
287 let has_acceptor_near = |atom: &Atom| -> bool {
288 for chain in structure.iter_chains() {
289 for other_res in chain.iter_residues() {
290 if other_res.id == residue.id && chain.id == "Unknown" {
291 continue;
292 }
293 if other_res == residue {
294 continue;
295 }
296
297 for other_atom in other_res.iter_atoms() {
298 if matches!(other_atom.element, Element::O | Element::N)
299 && atom.distance_squared(other_atom) < 3.5 * 3.5
300 {
301 return true;
302 }
303 }
304 }
305 }
306 false
307 };
308
309 let nd1_interaction = nd1.map(has_acceptor_near).unwrap_or(false);
310 let ne2_interaction = ne2.map(has_acceptor_near).unwrap_or(false);
311
312 match (nd1_interaction, ne2_interaction) {
313 (true, false) => "HID".to_string(),
314 (false, true) => "HIE".to_string(),
315 _ => "HID".to_string(),
316 }
317}
318
319fn construct_hydrogens_for_residue(
335 residue: &mut Residue,
336 config: &HydroConfig,
337) -> Result<(), Error> {
338 let template_name = residue.name.clone();
339
340 let template_view =
341 db::get_template(&template_name).ok_or_else(|| Error::MissingInternalTemplate {
342 res_name: template_name.clone(),
343 })?;
344
345 let existing_atoms: HashSet<String> = residue.atoms().iter().map(|a| a.name.clone()).collect();
346
347 for (h_name, h_tmpl_pos, anchors_iter) in template_view.hydrogens() {
348 if existing_atoms.contains(h_name) {
349 continue;
350 }
351
352 let anchors: Vec<&str> = anchors_iter.collect();
353 if let Ok(pos) = reconstruct_geometry(residue, h_tmpl_pos, &anchors) {
354 residue.add_atom(Atom::new(h_name, Element::H, pos));
355 } else {
356 return Err(Error::incomplete_for_hydro(
357 &residue.name,
358 residue.id,
359 anchors.first().copied().unwrap_or("?"),
360 ));
361 }
362 }
363
364 match residue.position {
365 ResiduePosition::NTerminal if residue.standard_name.is_some_and(|s| s.is_protein()) => {
366 construct_n_term_hydrogens(residue, n_term_should_be_protonated(config))?;
367 }
368 ResiduePosition::CTerminal if residue.standard_name.is_some_and(|s| s.is_protein()) => {
369 construct_c_term_hydrogen(residue, c_term_should_be_protonated(config))?;
370 }
371 ResiduePosition::ThreePrime if residue.standard_name.is_some_and(|s| s.is_nucleic()) => {
372 construct_3_prime_hydrogen(residue)?;
373 }
374 ResiduePosition::FivePrime if residue.standard_name.is_some_and(|s| s.is_nucleic()) => {
375 if residue.has_atom("P") {
376 construct_5_prime_phosphate_hydrogens(residue, config)?;
377 } else if residue.has_atom("O5'") {
378 construct_5_prime_hydrogen(residue)?;
379 }
380 }
381 _ => {}
382 }
383
384 Ok(())
385}
386
387fn n_term_should_be_protonated(config: &HydroConfig) -> bool {
397 config.target_ph.map(|ph| ph < N_TERM_PKA).unwrap_or(true)
398}
399
400fn c_term_should_be_protonated(config: &HydroConfig) -> bool {
410 config.target_ph.map(|ph| ph < C_TERM_PKA).unwrap_or(false)
411}
412
413fn reconstruct_geometry(
425 residue: &Residue,
426 target_tmpl_pos: Point,
427 anchor_names: &[&str],
428) -> Result<Point, ()> {
429 let template_view = db::get_template(&residue.name).ok_or(())?;
430
431 let mut residue_pts = Vec::new();
432 let mut template_pts = Vec::new();
433
434 for name in anchor_names {
435 let r_atom = residue.atom(name).ok_or(())?;
436 residue_pts.push(r_atom.pos);
437
438 let t_pos = template_view
439 .heavy_atoms()
440 .find(|(n, _, _)| n == name)
441 .map(|(_, _, p)| p)
442 .ok_or(())?;
443 template_pts.push(t_pos);
444 }
445
446 let (rot, trans) = calculate_transform(&residue_pts, &template_pts).ok_or(())?;
447
448 Ok(rot * target_tmpl_pos + trans)
449}
450
451fn construct_n_term_hydrogens(residue: &mut Residue, protonated: bool) -> Result<(), Error> {
466 residue.remove_atom("H");
467 residue.remove_atom("H1");
468 residue.remove_atom("H2");
469 residue.remove_atom("H3");
470
471 let n_pos = residue
472 .atom("N")
473 .ok_or_else(|| Error::incomplete_for_hydro(&residue.name, residue.id, "N"))?
474 .pos;
475 let ca_pos = residue
476 .atom("CA")
477 .ok_or_else(|| Error::incomplete_for_hydro(&residue.name, residue.id, "CA"))?
478 .pos;
479
480 let v_ca_n = (n_pos - ca_pos).normalize();
481 let bond_len = 1.0;
482 let angle = 109.5_f64.to_radians();
483 let sin_a = angle.sin();
484 let cos_a = angle.cos();
485
486 let up = if v_ca_n.x.abs() < 0.9 {
487 Vector3::x()
488 } else {
489 Vector3::y()
490 };
491 let v_perp = v_ca_n.cross(&up).normalize();
492 let base_dir = v_ca_n.scale(cos_a) + v_perp.scale(sin_a);
493
494 let phases = [0.0_f64, 120.0, 240.0];
495 let mut candidates: Vec<Vector3<f64>> = phases
496 .iter()
497 .map(|deg| {
498 let rot_axis = Rotation3::from_axis_angle(
499 &nalgebra::Unit::new_normalize(v_ca_n),
500 deg.to_radians(),
501 );
502 rot_axis * base_dir
503 })
504 .collect();
505
506 candidates.sort_by(|a, b| {
507 a.dot(&v_ca_n)
508 .partial_cmp(&b.dot(&v_ca_n))
509 .unwrap_or(std::cmp::Ordering::Equal)
510 });
511
512 let target_count = if protonated { 3 } else { 2 };
513 let names = ["H1", "H2", "H3"];
514 for (idx, dir) in candidates.into_iter().take(target_count).enumerate() {
515 residue.add_atom(Atom::new(
516 names[idx],
517 Element::H,
518 n_pos + dir.scale(bond_len),
519 ));
520 }
521
522 Ok(())
523}
524
525fn construct_c_term_hydrogen(residue: &mut Residue, protonated: bool) -> Result<(), Error> {
540 if !protonated {
541 residue.remove_atom("HOXT");
542 residue.remove_atom("HXT");
543 return Ok(());
544 }
545
546 residue.remove_atom("HXT");
547 if residue.has_atom("HOXT") {
548 return Ok(());
549 }
550
551 let c_pos = residue
552 .atom("C")
553 .ok_or_else(|| Error::incomplete_for_hydro(&residue.name, residue.id, "C"))?
554 .pos;
555 let oxt_pos = residue
556 .atom("OXT")
557 .ok_or_else(|| Error::incomplete_for_hydro(&residue.name, residue.id, "OXT"))?
558 .pos;
559
560 let direction = oxt_pos - c_pos;
561 if direction.norm_squared() < 1e-6 {
562 return Err(Error::incomplete_for_hydro(
563 &residue.name,
564 residue.id,
565 "OXT",
566 ));
567 }
568 let dir = direction.normalize();
569 let h_pos = oxt_pos + dir.scale(0.97);
570
571 residue.add_atom(Atom::new("HOXT", Element::H, h_pos));
572 Ok(())
573}
574
575fn construct_3_prime_hydrogen(residue: &mut Residue) -> Result<(), Error> {
589 if residue.has_atom("HO3'") {
590 return Ok(());
591 }
592
593 let o3 = residue
594 .atom("O3'")
595 .ok_or_else(|| Error::incomplete_for_hydro(&residue.name, residue.id, "O3'"))?
596 .pos;
597 let c3 = residue
598 .atom("C3'")
599 .ok_or_else(|| Error::incomplete_for_hydro(&residue.name, residue.id, "C3'"))?
600 .pos;
601 let c4 = residue
602 .atom("C4'")
603 .or_else(|| residue.atom("C2'"))
604 .map(|a| a.pos)
605 .unwrap_or_else(|| c3 + Vector3::x());
606
607 let v_c3_o3 = (o3 - c3).normalize();
608
609 let v_c4_c3 = (c3 - c4).normalize();
610 let normal = v_c3_o3.cross(&v_c4_c3).normalize();
611
612 let h_dir = (v_c3_o3 + normal).normalize();
613 let h_pos = o3 + h_dir.scale(0.96);
614
615 residue.add_atom(Atom::new("HO3'", Element::H, h_pos));
616 Ok(())
617}
618
619fn construct_5_prime_hydrogen(residue: &mut Residue) -> Result<(), Error> {
633 if residue.has_atom("HO5'") {
634 return Ok(());
635 }
636
637 let o5 = residue
638 .atom("O5'")
639 .ok_or_else(|| Error::incomplete_for_hydro(&residue.name, residue.id, "O5'"))?
640 .pos;
641 let c5 = residue
642 .atom("C5'")
643 .ok_or_else(|| Error::incomplete_for_hydro(&residue.name, residue.id, "C5'"))?
644 .pos;
645
646 let v_c5_o5 = (o5 - c5).normalize();
647 let aux = if v_c5_o5.z.abs() < 0.9 {
648 Vector3::z()
649 } else {
650 Vector3::x()
651 };
652 let perp = v_c5_o5.cross(&aux).normalize();
653
654 let h_dir = (v_c5_o5 + perp).normalize();
655 let h_pos = o5 + h_dir.scale(0.96);
656
657 residue.add_atom(Atom::new("HO5'", Element::H, h_pos));
658 Ok(())
659}
660
661fn construct_5_prime_phosphate_hydrogens(
680 residue: &mut Residue,
681 config: &HydroConfig,
682) -> Result<(), Error> {
683 let ph = config.target_ph.unwrap_or(7.4);
684
685 if ph >= PHOSPHATE_PKA2 {
686 residue.remove_atom("HOP3");
687 residue.remove_atom("HOP2");
688 return Ok(());
689 }
690
691 if residue.has_atom("HOP3") {
692 return Ok(());
693 }
694
695 let op3 = residue
696 .atom("OP3")
697 .ok_or_else(|| Error::incomplete_for_hydro(&residue.name, residue.id, "OP3"))?
698 .pos;
699 let p = residue
700 .atom("P")
701 .ok_or_else(|| Error::incomplete_for_hydro(&residue.name, residue.id, "P"))?
702 .pos;
703
704 let direction = (op3 - p).normalize();
705 let h_pos = op3 + direction * 0.96;
706
707 residue.add_atom(Atom::new("HOP3", Element::H, h_pos));
708 Ok(())
709}
710
711fn calculate_transform(r_pts: &[Point], t_pts: &[Point]) -> Option<(Matrix3<f64>, Vector3<f64>)> {
724 let n = r_pts.len();
725 if n != t_pts.len() || n == 0 {
726 return None;
727 }
728
729 let r_center = r_pts.iter().map(|p| p.coords).sum::<Vector3<f64>>() / n as f64;
730 let t_center = t_pts.iter().map(|p| p.coords).sum::<Vector3<f64>>() / n as f64;
731
732 if n == 1 {
733 return Some((Matrix3::identity(), r_center - t_center));
734 }
735
736 if n == 2 {
737 let v_r = r_pts[1] - r_pts[0];
738 let v_t = t_pts[1] - t_pts[0];
739 let rot = Rotation3::rotation_between(&v_t, &v_r).unwrap_or_else(Rotation3::identity);
740 let trans = r_center - rot * t_center;
741 return Some((rot.into_inner(), trans));
742 }
743
744 let mut cov = Matrix3::zeros();
745 for (p_r, p_t) in r_pts.iter().zip(t_pts.iter()) {
746 let v_r = p_r.coords - r_center;
747 let v_t = p_t.coords - t_center;
748 cov += v_r * v_t.transpose();
749 }
750
751 let svd = cov.svd(true, true);
752 let u = svd.u?;
753 let v_t = svd.v_t?;
754
755 let mut rot = u * v_t;
756 if rot.determinant() < 0.0 {
757 let mut corr = Matrix3::identity();
758 corr[(2, 2)] = -1.0;
759 rot = u * corr * v_t;
760 }
761
762 let trans = r_center - rot * t_center;
763 Some((rot, trans))
764}
765
766#[cfg(test)]
767mod tests {
768 use super::*;
769 use crate::model::{
770 atom::Atom,
771 chain::Chain,
772 residue::Residue,
773 types::{Element, Point, ResidueCategory, ResiduePosition, StandardResidue},
774 };
775
776 fn residue_from_template(name: &str, std: StandardResidue, id: i32) -> Residue {
777 let template = db::get_template(name).unwrap_or_else(|| panic!("missing template {name}"));
778 let mut residue = Residue::new(id, None, name, Some(std), ResidueCategory::Standard);
779 residue.position = ResiduePosition::Internal;
780 for (atom_name, element, pos) in template.heavy_atoms() {
781 residue.add_atom(Atom::new(atom_name, element, pos));
782 }
783 residue
784 }
785
786 fn structure_with_residue(residue: Residue) -> Structure {
787 let mut chain = Chain::new("A");
788 chain.add_residue(residue);
789 let mut structure = Structure::new();
790 structure.add_chain(chain);
791 structure
792 }
793
794 fn structure_with_residues(residues: Vec<Residue>) -> Structure {
795 let mut chain = Chain::new("A");
796 for residue in residues {
797 chain.add_residue(residue);
798 }
799 let mut structure = Structure::new();
800 structure.add_chain(chain);
801 structure
802 }
803
804 fn n_terminal_residue(id: i32) -> Residue {
805 let mut residue = residue_from_template("ALA", StandardResidue::ALA, id);
806 residue.position = ResiduePosition::NTerminal;
807 residue
808 }
809
810 fn c_terminal_residue(id: i32) -> Residue {
811 let mut residue = residue_from_template("ALA", StandardResidue::ALA, id);
812 residue.position = ResiduePosition::CTerminal;
813 let c_pos = residue.atom("C").expect("C atom").pos;
814 let o_pos = residue.atom("O").expect("O atom").pos;
815 let offset = c_pos - o_pos;
816 let oxt_pos = c_pos + offset;
817 residue.add_atom(Atom::new("OXT", Element::O, oxt_pos));
818 residue
819 }
820
821 fn five_prime_residue_with_phosphate(id: i32) -> Residue {
822 let template = db::get_template("DA").unwrap();
823 let mut residue = Residue::new(
824 id,
825 None,
826 "DA",
827 Some(StandardResidue::DA),
828 ResidueCategory::Standard,
829 );
830 residue.position = ResiduePosition::FivePrime;
831 for (atom_name, element, pos) in template.heavy_atoms() {
832 residue.add_atom(Atom::new(atom_name, element, pos));
833 }
834 let p_pos = residue.atom("P").unwrap().pos;
835 let op1_pos = residue.atom("OP1").unwrap().pos;
836 let op2_pos = residue.atom("OP2").unwrap().pos;
837 let o5_pos = residue.atom("O5'").unwrap().pos;
838 let centroid = (op1_pos.coords + op2_pos.coords + o5_pos.coords) / 3.0;
839 let direction = (p_pos.coords - centroid).normalize();
840 let op3_pos = p_pos + direction * 1.48;
841 residue.add_atom(Atom::new("OP3", Element::O, op3_pos));
842 residue
843 }
844
845 fn five_prime_residue_without_phosphate(id: i32) -> Residue {
846 let template = db::get_template("DA").unwrap();
847 let mut residue = Residue::new(
848 id,
849 None,
850 "DA",
851 Some(StandardResidue::DA),
852 ResidueCategory::Standard,
853 );
854 residue.position = ResiduePosition::FivePrime;
855 for (atom_name, element, pos) in template.heavy_atoms() {
856 if !matches!(atom_name, "P" | "OP1" | "OP2") {
857 residue.add_atom(Atom::new(atom_name, element, pos));
858 }
859 }
860 residue
861 }
862
863 fn three_prime_residue(id: i32) -> Residue {
864 let template = db::get_template("DA").unwrap();
865 let mut residue = Residue::new(
866 id,
867 None,
868 "DA",
869 Some(StandardResidue::DA),
870 ResidueCategory::Standard,
871 );
872 residue.position = ResiduePosition::ThreePrime;
873 for (atom_name, element, pos) in template.heavy_atoms() {
874 residue.add_atom(Atom::new(atom_name, element, pos));
875 }
876 residue
877 }
878
879 #[test]
880 fn titratable_templates_exist_in_database() {
881 let expected = [
882 "ASP", "ASH", "GLU", "GLH", "LYS", "LYN", "ARG", "ARN", "CYS", "CYM", "TYR", "TYM",
883 "HID", "HIE", "HIP",
884 ];
885
886 for name in expected {
887 assert!(
888 db::get_template(name).is_some(),
889 "template {name} should exist"
890 );
891 }
892 }
893
894 #[test]
895 fn determine_protonation_state_tracks_pka_thresholds() {
896 let structure =
897 structure_with_residue(residue_from_template("ASP", StandardResidue::ASP, 1));
898 let mut config = HydroConfig {
899 target_ph: Some(2.5),
900 ..HydroConfig::default()
901 };
902 assert_eq!(
903 determine_protonation_state(&structure, 0, 0, &config),
904 Some("ASH".to_string())
905 );
906
907 config.target_ph = Some(5.0);
908 assert_eq!(
909 determine_protonation_state(&structure, 0, 0, &config),
910 Some("ASP".to_string())
911 );
912
913 let structure =
914 structure_with_residue(residue_from_template("LYS", StandardResidue::LYS, 2));
915 config.target_ph = Some(11.0);
916 assert_eq!(
917 determine_protonation_state(&structure, 0, 0, &config),
918 Some("LYN".to_string())
919 );
920
921 config.target_ph = Some(7.0);
922 assert_eq!(
923 determine_protonation_state(&structure, 0, 0, &config),
924 Some("LYS".to_string())
925 );
926 }
927
928 #[test]
929 fn determine_protonation_state_respects_his_strategy() {
930 let mut residue = residue_from_template("HID", StandardResidue::HIS, 3);
931 residue.name = "HIS".to_string();
932 let structure = structure_with_residue(residue);
933
934 let config = HydroConfig {
935 target_ph: Some(7.0),
936 his_strategy: HisStrategy::DirectHIE,
937 ..HydroConfig::default()
938 };
939
940 assert_eq!(
941 determine_protonation_state(&structure, 0, 0, &config),
942 Some("HIE".to_string())
943 );
944
945 let mut acid_config = HydroConfig::default();
946 acid_config.target_ph = Some(5.5);
947 assert_eq!(
948 determine_protonation_state(&structure, 0, 0, &acid_config),
949 Some("HIP".to_string())
950 );
951 }
952
953 #[test]
954 fn n_terminal_defaults_to_protonated_without_ph() {
955 let residue = n_terminal_residue(40);
956 let mut structure = structure_with_residue(residue);
957
958 add_hydrogens(&mut structure, &HydroConfig::default()).expect("hydrogenation succeeds");
959
960 let residue = structure.find_residue("A", 40, None).unwrap();
961 assert!(residue.has_atom("H1"));
962 assert!(residue.has_atom("H2"));
963 assert!(residue.has_atom("H3"));
964 }
965
966 #[test]
967 fn n_terminal_deprotonates_above_pka() {
968 let residue = n_terminal_residue(41);
969 let mut structure = structure_with_residue(residue);
970 let mut config = HydroConfig::default();
971 config.target_ph = Some(9.0);
972
973 add_hydrogens(&mut structure, &config).expect("hydrogenation succeeds");
974
975 let residue = structure.find_residue("A", 41, None).unwrap();
976 assert!(residue.has_atom("H1"));
977 assert!(residue.has_atom("H2"));
978 assert!(!residue.has_atom("H3"));
979 }
980
981 #[test]
982 fn c_terminal_protonates_under_acidic_ph() {
983 let residue = c_terminal_residue(50);
984 let mut structure = structure_with_residue(residue);
985 let mut config = HydroConfig::default();
986 config.target_ph = Some(2.5);
987
988 add_hydrogens(&mut structure, &config).expect("hydrogenation succeeds");
989
990 let residue = structure.find_residue("A", 50, None).unwrap();
991 assert!(residue.has_atom("HOXT"));
992 }
993
994 #[test]
995 fn c_terminal_remains_deprotonated_at_physiological_ph() {
996 let residue = c_terminal_residue(51);
997 let mut structure = structure_with_residue(residue);
998
999 add_hydrogens(&mut structure, &HydroConfig::default()).expect("hydrogenation succeeds");
1000
1001 let residue = structure.find_residue("A", 51, None).unwrap();
1002 assert!(!residue.has_atom("HOXT"));
1003 }
1004
1005 #[test]
1006 fn determine_protonation_state_skips_already_marked_cyx() {
1007 let mut residue = residue_from_template("CYS", StandardResidue::CYS, 25);
1008 residue.name = "CYX".to_string();
1009 let structure = structure_with_residue(residue);
1010 let mut config = HydroConfig::default();
1011 config.target_ph = Some(9.0);
1012
1013 assert_eq!(determine_protonation_state(&structure, 0, 0, &config), None);
1014 }
1015
1016 #[test]
1017 fn add_hydrogens_populates_internal_lysine_side_chain() {
1018 let mut residue = residue_from_template("LYS", StandardResidue::LYS, 10);
1019 residue.add_atom(Atom::new("FAKE", Element::H, Point::origin()));
1020 let mut structure = structure_with_residue(residue);
1021
1022 add_hydrogens(&mut structure, &HydroConfig::default()).expect("hydrogenation succeeds");
1023
1024 let residue = structure.find_residue("A", 10, None).unwrap();
1025 assert!(residue.has_atom("HZ1"));
1026 assert!(residue.has_atom("HZ2"));
1027 assert!(residue.has_atom("HZ3"));
1028 assert!(!residue.has_atom("FAKE"));
1029 }
1030
1031 #[test]
1032 fn add_hydrogens_relabels_asp_under_acidic_ph() {
1033 let residue = residue_from_template("ASP", StandardResidue::ASP, 15);
1034 let mut structure = structure_with_residue(residue);
1035 let mut config = HydroConfig::default();
1036 config.target_ph = Some(2.0);
1037
1038 add_hydrogens(&mut structure, &config).expect("hydrogenation succeeds");
1039
1040 let residue = structure.find_residue("A", 15, None).unwrap();
1041 assert_eq!(residue.name, "ASH");
1042 assert!(residue.has_atom("HD2"));
1043 }
1044
1045 #[test]
1046 fn construct_hydrogens_errors_when_anchor_missing() {
1047 let template = db::get_template("ALA").expect("template ALA");
1048 let mut residue = Residue::new(
1049 20,
1050 None,
1051 "ALA",
1052 Some(StandardResidue::ALA),
1053 ResidueCategory::Standard,
1054 );
1055 residue.position = ResiduePosition::Internal;
1056
1057 let (name, element, pos) = template.heavy_atoms().next().unwrap();
1058 residue.add_atom(Atom::new(name, element, pos));
1059
1060 let err = construct_hydrogens_for_residue(&mut residue, &HydroConfig::default())
1061 .expect_err("should fail");
1062 assert!(matches!(err, Error::IncompleteResidueForHydro { .. }));
1063 }
1064
1065 #[test]
1066 fn close_cysteines_are_relabelled_to_cyx_and_skip_hydrogens() {
1067 let cys1 = residue_from_template("CYS", StandardResidue::CYS, 30);
1068 let mut cys2 = residue_from_template("CYS", StandardResidue::CYS, 31);
1069
1070 let sg1 = cys1.atom("SG").expect("SG in cys1").pos;
1071 let sg2 = cys2.atom("SG").expect("SG in cys2").pos;
1072 let desired = sg1 + Vector3::new(0.5, 0.0, 0.0);
1073 let offset = desired - sg2;
1074 for atom in cys2.iter_atoms_mut() {
1075 atom.translate_by(&offset);
1076 }
1077
1078 let mut structure = structure_with_residues(vec![cys1, cys2]);
1079 add_hydrogens(&mut structure, &HydroConfig::default()).expect("hydrogenation");
1080
1081 let res1 = structure.find_residue("A", 30, None).unwrap();
1082 let res2 = structure.find_residue("A", 31, None).unwrap();
1083 assert_eq!(res1.name, "CYX");
1084 assert_eq!(res2.name, "CYX");
1085 assert!(!res1.has_atom("HG"));
1086 assert!(!res2.has_atom("HG"));
1087 }
1088
1089 #[test]
1090 fn five_prime_phosphate_deprotonated_at_physiological_ph() {
1091 let residue = five_prime_residue_with_phosphate(60);
1092 let mut structure = structure_with_residue(residue);
1093
1094 add_hydrogens(&mut structure, &HydroConfig::default()).expect("hydrogenation succeeds");
1095
1096 let residue = structure.find_residue("A", 60, None).unwrap();
1097 assert!(residue.has_atom("OP3"), "OP3 should remain");
1098 assert!(
1099 !residue.has_atom("HOP3"),
1100 "HOP3 should not exist at neutral pH"
1101 );
1102 assert!(
1103 !residue.has_atom("HOP2"),
1104 "HOP2 should not exist at neutral pH"
1105 );
1106 }
1107
1108 #[test]
1109 fn five_prime_phosphate_protonated_below_pka() {
1110 let residue = five_prime_residue_with_phosphate(61);
1111 let mut structure = structure_with_residue(residue);
1112 let mut config = HydroConfig::default();
1113 config.target_ph = Some(5.5);
1114
1115 add_hydrogens(&mut structure, &config).expect("hydrogenation succeeds");
1116
1117 let residue = structure.find_residue("A", 61, None).unwrap();
1118 assert!(residue.has_atom("OP3"), "OP3 should remain");
1119 assert!(residue.has_atom("HOP3"), "HOP3 should be added below pKa");
1120 }
1121
1122 #[test]
1123 fn five_prime_without_phosphate_gets_ho5() {
1124 let residue = five_prime_residue_without_phosphate(62);
1125 let mut structure = structure_with_residue(residue);
1126
1127 add_hydrogens(&mut structure, &HydroConfig::default()).expect("hydrogenation succeeds");
1128
1129 let residue = structure.find_residue("A", 62, None).unwrap();
1130 assert!(
1131 residue.has_atom("HO5'"),
1132 "HO5' should be added for 5'-OH terminus"
1133 );
1134 assert!(!residue.has_atom("P"), "phosphorus should not exist");
1135 }
1136
1137 #[test]
1138 fn three_prime_nucleic_gets_ho3() {
1139 let residue = three_prime_residue(70);
1140 let mut structure = structure_with_residue(residue);
1141
1142 add_hydrogens(&mut structure, &HydroConfig::default()).expect("hydrogenation succeeds");
1143
1144 let residue = structure.find_residue("A", 70, None).unwrap();
1145 assert!(
1146 residue.has_atom("HO3'"),
1147 "HO3' should be added for 3' terminal"
1148 );
1149 }
1150}