bio_forge/ops/
hydro.rs

1//! Protonation-aware hydrogen placement for protein and nucleic acid structures.
2//!
3//! This module inspects residue templates, predicts protonation states from pH or hydrogen
4//! bonding networks, relabels residues accordingly, and rebuilds missing hydrogens while
5//! respecting polymer termini, nucleic acid priming, and disulfide bridges.
6
7use crate::db;
8use crate::model::{
9    atom::Atom,
10    residue::Residue,
11    structure::Structure,
12    types::{Element, Point, ResidueCategory, ResiduePosition, StandardResidue},
13};
14use crate::ops::error::Error;
15use nalgebra::{Matrix3, Rotation3, Vector3};
16use rand::Rng;
17use std::collections::HashSet;
18
19/// Maximum sulfur–sulfur distance (Å) used to detect disulfide bridges.
20const DISULFIDE_SG_THRESHOLD: f64 = 2.2;
21/// Henderson–Hasselbalch breakpoint for protonated N-termini.
22const N_TERM_PKA: f64 = 8.0;
23/// Henderson–Hasselbalch breakpoint for protonated C-termini.
24const C_TERM_PKA: f64 = 3.1;
25
26/// Parameters controlling hydrogen addition behavior.
27///
28/// `HydroConfig` can target a specific solution pH, remove pre-existing hydrogens, and
29/// choose how neutral histidine tautomers are assigned.
30#[derive(Debug, Clone)]
31pub struct HydroConfig {
32    /// Optional solvent pH value used for titration decisions.
33    pub target_ph: Option<f64>,
34    /// Whether to strip all existing hydrogens before reconstruction.
35    pub remove_existing_h: bool,
36    /// Strategy for setting neutral histidine tautomer labels.
37    pub his_strategy: HisStrategy,
38}
39
40impl Default for HydroConfig {
41    /// Provides biologically reasonable defaults (physiological pH, removal of old hydrogens,
42    /// and hydrogen-bond-aware histidine selection).
43    fn default() -> Self {
44        Self {
45            target_ph: None,
46            remove_existing_h: true,
47            his_strategy: HisStrategy::HbNetwork,
48        }
49    }
50}
51
52/// Strategies for choosing neutral histidine labels.
53#[derive(Debug, Clone, Copy, PartialEq, Eq)]
54pub enum HisStrategy {
55    /// Force all neutral histidines to HID (delta-protonated).
56    DirectHID,
57    /// Force all neutral histidines to HIE (epsilon-protonated).
58    DirectHIE,
59    /// Randomly choose between HID and HIE.
60    Random,
61    /// Analyze hydrogen-bond networks to select the tautomer most likely to bond.
62    HbNetwork,
63}
64
65/// Adds hydrogens to all standard residues in-place, updating protonation states when needed.
66///
67/// Disulfide bridges are detected prior to titration and preserved by relabeling cysteines to
68/// `CYX`. Residues lacking required anchor atoms will trigger descriptive errors so upstream
69/// cleanup steps can correct the issues.
70///
71/// # Arguments
72///
73/// * `structure` - Mutable structure whose residues will be protonated and hydrated.
74/// * `config` - Hydrogenation configuration, including pH target and histidine strategy.
75///
76/// # Returns
77///
78/// `Ok(())` when hydrogenation succeeds.
79///
80/// # Errors
81///
82/// Returns [`Error::MissingInternalTemplate`] when no template is found or
83/// [`Error::IncompleteResidueForHydro`] when required anchor atoms are missing.
84pub fn add_hydrogens(structure: &mut Structure, config: &HydroConfig) -> Result<(), Error> {
85    let mut targets = Vec::new();
86    for (c_idx, chain) in structure.iter_chains().enumerate() {
87        for (r_idx, residue) in chain.iter_residues().enumerate() {
88            if residue.category == ResidueCategory::Standard {
89                targets.push((c_idx, r_idx));
90            }
91        }
92    }
93
94    mark_disulfide_bridges(structure);
95
96    for (c_idx, r_idx) in targets {
97        let new_name = determine_protonation_state(structure, c_idx, r_idx, config);
98
99        let chain = structure.iter_chains_mut().nth(c_idx).unwrap();
100        let residue = chain.iter_residues_mut().nth(r_idx).unwrap();
101
102        if let Some(name) = new_name {
103            residue.name = name;
104        }
105
106        if config.remove_existing_h {
107            residue.strip_hydrogens();
108        }
109
110        construct_hydrogens_for_residue(residue, config)?;
111    }
112
113    Ok(())
114}
115
116/// Predicts the protonation-induced residue rename for a given polymer residue.
117///
118/// Applies residue-specific pKa thresholds, handles histidine tautomer selection, and
119/// respects previously tagged disulfide cysteines.
120///
121/// # Arguments
122///
123/// * `structure` - Structure providing context for hydrogen-bond analysis.
124/// * `c_idx` - Chain index of the residue under evaluation.
125/// * `r_idx` - Residue index within the chain.
126/// * `config` - Hydrogenation configuration containing pH and histidine options.
127///
128/// # Returns
129///
130/// `Some(new_name)` when the residue should be relabeled; otherwise `None`.
131fn determine_protonation_state(
132    structure: &Structure,
133    c_idx: usize,
134    r_idx: usize,
135    config: &HydroConfig,
136) -> Option<String> {
137    let chain = structure.iter_chains().nth(c_idx)?;
138    let residue = chain.iter_residues().nth(r_idx)?;
139    let std = residue.standard_name?;
140
141    if std == StandardResidue::CYS && residue.name == "CYX" {
142        return None;
143    }
144
145    if let Some(ph) = config.target_ph {
146        return match std {
147            StandardResidue::ASP => Some(if ph < 3.9 {
148                "ASH".to_string()
149            } else {
150                "ASP".to_string()
151            }),
152            StandardResidue::GLU => Some(if ph < 4.2 {
153                "GLH".to_string()
154            } else {
155                "GLU".to_string()
156            }),
157            StandardResidue::LYS => Some(if ph > 10.5 {
158                "LYN".to_string()
159            } else {
160                "LYS".to_string()
161            }),
162            StandardResidue::ARG => Some(if ph > 12.5 {
163                "ARN".to_string()
164            } else {
165                "ARG".to_string()
166            }),
167            StandardResidue::CYS => Some(if ph > 8.3 {
168                "CYM".to_string()
169            } else {
170                "CYS".to_string()
171            }),
172            StandardResidue::TYR => Some(if ph > 10.0 {
173                "TYM".to_string()
174            } else {
175                "TYR".to_string()
176            }),
177            StandardResidue::HIS => {
178                if ph < 6.0 {
179                    Some("HIP".to_string())
180                } else {
181                    Some(select_neutral_his(structure, residue, &config.his_strategy))
182                }
183            }
184            _ => None,
185        };
186    }
187
188    if std == StandardResidue::HIS && matches!(residue.name.as_str(), "HIS" | "HID" | "HIE" | "HIP")
189    {
190        return Some(select_neutral_his(structure, residue, &config.his_strategy));
191    }
192
193    None
194}
195
196/// Identifies cysteine pairs forming disulfide bonds and renames them to `CYX`.
197///
198/// # Arguments
199///
200/// * `structure` - Mutable structure containing residues to scan and relabel.
201fn mark_disulfide_bridges(structure: &mut Structure) {
202    let mut cys_sulfurs = Vec::new();
203    for (c_idx, chain) in structure.iter_chains().enumerate() {
204        for (r_idx, residue) in chain.iter_residues().enumerate() {
205            if let (true, Some(sg)) = (
206                matches!(residue.standard_name, Some(StandardResidue::CYS)),
207                residue.atom("SG"),
208            ) {
209                cys_sulfurs.push((c_idx, r_idx, sg.pos));
210            }
211        }
212    }
213
214    if cys_sulfurs.len() < 2 {
215        return;
216    }
217
218    let threshold_sq = DISULFIDE_SG_THRESHOLD * DISULFIDE_SG_THRESHOLD;
219    let mut disulfide_residues: HashSet<(usize, usize)> = HashSet::new();
220    for i in 0..cys_sulfurs.len() {
221        for j in (i + 1)..cys_sulfurs.len() {
222            let (ci, ri, pos_i) = &cys_sulfurs[i];
223            let (cj, rj, pos_j) = &cys_sulfurs[j];
224            if (*pos_i - *pos_j).norm_squared() <= threshold_sq {
225                disulfide_residues.insert((*ci, *ri));
226                disulfide_residues.insert((*cj, *rj));
227            }
228        }
229    }
230
231    if disulfide_residues.is_empty() {
232        return;
233    }
234
235    for (c_idx, chain) in structure.iter_chains_mut().enumerate() {
236        for (r_idx, residue) in chain.iter_residues_mut().enumerate() {
237            if disulfide_residues.contains(&(c_idx, r_idx)) && residue.name != "CYX" {
238                residue.name = "CYX".to_string();
239            }
240        }
241    }
242}
243
244/// Selects a neutral histidine tautomer using the configured strategy.
245///
246/// # Arguments
247///
248/// * `structure` - Structure used when hydrogen-bond networks are considered.
249/// * `residue` - Histidine residue being relabeled.
250/// * `strategy` - Selection strategy from [`HisStrategy`].
251///
252/// # Returns
253///
254/// Either `"HID"` or `"HIE"`.
255fn select_neutral_his(structure: &Structure, residue: &Residue, strategy: &HisStrategy) -> String {
256    match strategy {
257        HisStrategy::DirectHID => "HID".to_string(),
258        HisStrategy::DirectHIE => "HIE".to_string(),
259        HisStrategy::Random => {
260            let mut rng = rand::rng();
261            if rng.random_bool(0.5) {
262                "HID".to_string()
263            } else {
264                "HIE".to_string()
265            }
266        }
267        HisStrategy::HbNetwork => optimize_his_network(structure, residue),
268    }
269}
270
271/// Determines the best histidine tautomer by inspecting nearby hydrogen-bond acceptors.
272///
273/// # Arguments
274///
275/// * `structure` - Complete structure used to search for acceptor atoms.
276/// * `residue` - Histidine residue being evaluated.
277///
278/// # Returns
279///
280/// The histidine label (`"HID"` or `"HIE"`) that maximizes compatibility with neighbors.
281fn optimize_his_network(structure: &Structure, residue: &Residue) -> String {
282    let nd1 = residue.atom("ND1");
283    let ne2 = residue.atom("NE2");
284
285    let has_acceptor_near = |atom: &Atom| -> bool {
286        for chain in structure.iter_chains() {
287            for other_res in chain.iter_residues() {
288                if other_res.id == residue.id && chain.id == "Unknown" {
289                    continue;
290                }
291                if other_res == residue {
292                    continue;
293                }
294
295                for other_atom in other_res.iter_atoms() {
296                    if matches!(other_atom.element, Element::O | Element::N)
297                        && atom.distance_squared(other_atom) < 3.5 * 3.5
298                    {
299                        return true;
300                    }
301                }
302            }
303        }
304        false
305    };
306
307    let nd1_interaction = nd1.map(has_acceptor_near).unwrap_or(false);
308    let ne2_interaction = ne2.map(has_acceptor_near).unwrap_or(false);
309
310    match (nd1_interaction, ne2_interaction) {
311        (true, false) => "HID".to_string(),
312        (false, true) => "HIE".to_string(),
313        _ => "HID".to_string(),
314    }
315}
316
317/// Rebuilds hydrogens for a single residue using template geometry and terminal rules.
318///
319/// # Arguments
320///
321/// * `residue` - Residue to augment with hydrogens.
322/// * `config` - Hydrogenation configuration influencing terminal protonation.
323///
324/// # Returns
325///
326/// `Ok(())` when all hydrogens were added or already present.
327///
328/// # Errors
329///
330/// Returns [`Error::MissingInternalTemplate`] or [`Error::IncompleteResidueForHydro`] when
331/// required template data or anchor atoms are missing.
332fn construct_hydrogens_for_residue(
333    residue: &mut Residue,
334    config: &HydroConfig,
335) -> Result<(), Error> {
336    let template_name = residue.name.clone();
337
338    let template_view =
339        db::get_template(&template_name).ok_or_else(|| Error::MissingInternalTemplate {
340            res_name: template_name.clone(),
341        })?;
342
343    let existing_atoms: HashSet<String> = residue.atoms().iter().map(|a| a.name.clone()).collect();
344
345    for (h_name, h_tmpl_pos, anchors_iter) in template_view.hydrogens() {
346        if existing_atoms.contains(h_name) {
347            continue;
348        }
349
350        let anchors: Vec<&str> = anchors_iter.collect();
351        if let Ok(pos) = reconstruct_geometry(residue, h_tmpl_pos, &anchors) {
352            residue.add_atom(Atom::new(h_name, Element::H, pos));
353        } else {
354            return Err(Error::incomplete_for_hydro(
355                &residue.name,
356                residue.id,
357                anchors.first().copied().unwrap_or("?"),
358            ));
359        }
360    }
361
362    match residue.position {
363        ResiduePosition::NTerminal if residue.standard_name.is_some_and(|s| s.is_protein()) => {
364            construct_n_term_hydrogens(residue, n_term_should_be_protonated(config))?;
365        }
366        ResiduePosition::CTerminal if residue.standard_name.is_some_and(|s| s.is_protein()) => {
367            construct_c_term_hydrogen(residue, c_term_should_be_protonated(config))?;
368        }
369        ResiduePosition::ThreePrime if residue.standard_name.is_some_and(|s| s.is_nucleic()) => {
370            construct_3_prime_hydrogen(residue)?;
371        }
372        ResiduePosition::FivePrime if residue.standard_name.is_some_and(|s| s.is_nucleic()) => {
373            if !residue.has_atom("P") && residue.has_atom("O5'") {
374                construct_5_prime_hydrogen(residue)?;
375            }
376        }
377        _ => {}
378    }
379
380    Ok(())
381}
382
383/// Evaluates whether an N-terminus should remain protonated under the configured pH.
384///
385/// # Arguments
386///
387/// * `config` - Hydrogenation configuration that may provide a target pH.
388///
389/// # Returns
390///
391/// `true` when the pH is below the configured threshold or unspecified.
392fn n_term_should_be_protonated(config: &HydroConfig) -> bool {
393    config.target_ph.map(|ph| ph < N_TERM_PKA).unwrap_or(true)
394}
395
396/// Evaluates whether a C-terminus should remain protonated under the configured pH.
397///
398/// # Arguments
399///
400/// * `config` - Hydrogenation configuration potentially specifying pH.
401///
402/// # Returns
403///
404/// `true` when the pH is below the acidic cutoff; otherwise `false`.
405fn c_term_should_be_protonated(config: &HydroConfig) -> bool {
406    config.target_ph.map(|ph| ph < C_TERM_PKA).unwrap_or(false)
407}
408
409/// Aligns template coordinates to residue anchors to predict a hydrogen position.
410///
411/// # Arguments
412///
413/// * `residue` - Residue containing measured anchor atoms.
414/// * `target_tmpl_pos` - Target hydrogen position from the template.
415/// * `anchor_names` - Atom names used to determine the rigid-body transform.
416///
417/// # Returns
418///
419/// `Ok(point)` containing the placed coordinate or `Err(())` if anchors are missing.
420fn reconstruct_geometry(
421    residue: &Residue,
422    target_tmpl_pos: Point,
423    anchor_names: &[&str],
424) -> Result<Point, ()> {
425    let template_view = db::get_template(&residue.name).ok_or(())?;
426
427    let mut residue_pts = Vec::new();
428    let mut template_pts = Vec::new();
429
430    for name in anchor_names {
431        let r_atom = residue.atom(name).ok_or(())?;
432        residue_pts.push(r_atom.pos);
433
434        let t_pos = template_view
435            .heavy_atoms()
436            .find(|(n, _, _)| n == name)
437            .map(|(_, _, p)| p)
438            .ok_or(())?;
439        template_pts.push(t_pos);
440    }
441
442    let (rot, trans) = calculate_transform(&residue_pts, &template_pts).ok_or(())?;
443
444    Ok(rot * target_tmpl_pos + trans)
445}
446
447/// Rebuilds the N-terminal amine hydrogens using tetrahedral geometry.
448///
449/// # Arguments
450///
451/// * `residue` - Residue whose terminal hydrogens are being reconstructed.
452/// * `protonated` - Whether to place three hydrogens (`true`) or two (`false`).
453///
454/// # Returns
455///
456/// `Ok(())` when hydrogens are successfully placed.
457///
458/// # Errors
459///
460/// Returns [`Error::IncompleteResidueForHydro`] if the N or CA atoms are missing.
461fn construct_n_term_hydrogens(residue: &mut Residue, protonated: bool) -> Result<(), Error> {
462    residue.remove_atom("H");
463    residue.remove_atom("H1");
464    residue.remove_atom("H2");
465    residue.remove_atom("H3");
466
467    let n_pos = residue
468        .atom("N")
469        .ok_or_else(|| Error::incomplete_for_hydro(&residue.name, residue.id, "N"))?
470        .pos;
471    let ca_pos = residue
472        .atom("CA")
473        .ok_or_else(|| Error::incomplete_for_hydro(&residue.name, residue.id, "CA"))?
474        .pos;
475
476    let v_ca_n = (n_pos - ca_pos).normalize();
477    let bond_len = 1.0;
478    let angle = 109.5_f64.to_radians();
479    let sin_a = angle.sin();
480    let cos_a = angle.cos();
481
482    let up = if v_ca_n.x.abs() < 0.9 {
483        Vector3::x()
484    } else {
485        Vector3::y()
486    };
487    let v_perp = v_ca_n.cross(&up).normalize();
488    let base_dir = v_ca_n.scale(cos_a) + v_perp.scale(sin_a);
489
490    let phases = [0.0_f64, 120.0, 240.0];
491    let mut candidates: Vec<Vector3<f64>> = phases
492        .iter()
493        .map(|deg| {
494            let rot_axis = Rotation3::from_axis_angle(
495                &nalgebra::Unit::new_normalize(v_ca_n),
496                deg.to_radians(),
497            );
498            rot_axis * base_dir
499        })
500        .collect();
501
502    candidates.sort_by(|a, b| {
503        a.dot(&v_ca_n)
504            .partial_cmp(&b.dot(&v_ca_n))
505            .unwrap_or(std::cmp::Ordering::Equal)
506    });
507
508    let target_count = if protonated { 3 } else { 2 };
509    let names = ["H1", "H2", "H3"];
510    for (idx, dir) in candidates.into_iter().take(target_count).enumerate() {
511        residue.add_atom(Atom::new(
512            names[idx],
513            Element::H,
514            n_pos + dir.scale(bond_len),
515        ));
516    }
517
518    Ok(())
519}
520
521/// Rebuilds the carboxylate proton at the C-terminus when protonated.
522///
523/// # Arguments
524///
525/// * `residue` - Residue to modify.
526/// * `protonated` - Whether the terminus should include the `HOXT` hydrogen.
527///
528/// # Returns
529///
530/// `Ok(())` after either removing or adding hydrogens as needed.
531///
532/// # Errors
533///
534/// Returns [`Error::IncompleteResidueForHydro`] if the `C` or `OXT` atoms are missing.
535fn construct_c_term_hydrogen(residue: &mut Residue, protonated: bool) -> Result<(), Error> {
536    if !protonated {
537        residue.remove_atom("HOXT");
538        residue.remove_atom("HXT");
539        return Ok(());
540    }
541
542    residue.remove_atom("HXT");
543    if residue.has_atom("HOXT") {
544        return Ok(());
545    }
546
547    let c_pos = residue
548        .atom("C")
549        .ok_or_else(|| Error::incomplete_for_hydro(&residue.name, residue.id, "C"))?
550        .pos;
551    let oxt_pos = residue
552        .atom("OXT")
553        .ok_or_else(|| Error::incomplete_for_hydro(&residue.name, residue.id, "OXT"))?
554        .pos;
555
556    let direction = oxt_pos - c_pos;
557    if direction.norm_squared() < 1e-6 {
558        return Err(Error::incomplete_for_hydro(
559            &residue.name,
560            residue.id,
561            "OXT",
562        ));
563    }
564    let dir = direction.normalize();
565    let h_pos = oxt_pos + dir.scale(0.97);
566
567    residue.add_atom(Atom::new("HOXT", Element::H, h_pos));
568    Ok(())
569}
570
571/// Adds the 3'-terminal hydroxyl hydrogen to nucleic acid residues when absent.
572///
573/// # Arguments
574///
575/// * `residue` - Residue representing a nucleic acid terminal unit.
576///
577/// # Returns
578///
579/// `Ok(())` when the hydrogen is added or already present.
580///
581/// # Errors
582///
583/// Returns [`Error::IncompleteResidueForHydro`] if required sugar atoms are missing.
584fn construct_3_prime_hydrogen(residue: &mut Residue) -> Result<(), Error> {
585    if residue.has_atom("HO3'") {
586        return Ok(());
587    }
588
589    let o3 = residue
590        .atom("O3'")
591        .ok_or_else(|| Error::incomplete_for_hydro(&residue.name, residue.id, "O3'"))?
592        .pos;
593    let c3 = residue
594        .atom("C3'")
595        .ok_or_else(|| Error::incomplete_for_hydro(&residue.name, residue.id, "C3'"))?
596        .pos;
597    let c4 = residue
598        .atom("C4'")
599        .or_else(|| residue.atom("C2'"))
600        .map(|a| a.pos)
601        .unwrap_or_else(|| c3 + Vector3::x());
602
603    let v_c3_o3 = (o3 - c3).normalize();
604
605    let v_c4_c3 = (c3 - c4).normalize();
606    let normal = v_c3_o3.cross(&v_c4_c3).normalize();
607
608    let h_dir = (v_c3_o3 + normal).normalize();
609    let h_pos = o3 + h_dir.scale(0.96);
610
611    residue.add_atom(Atom::new("HO3'", Element::H, h_pos));
612    Ok(())
613}
614
615/// Adds the 5'-terminal hydroxyl hydrogen for nucleic acid residues lacking phosphates.
616///
617/// # Arguments
618///
619/// * `residue` - Residue whose 5' terminus is being hydrated.
620///
621/// # Returns
622///
623/// `Ok(())` when the hydrogen is added or already exists.
624///
625/// # Errors
626///
627/// Returns [`Error::IncompleteResidueForHydro`] if sugar anchor atoms are missing.
628fn construct_5_prime_hydrogen(residue: &mut Residue) -> Result<(), Error> {
629    if residue.has_atom("HO5'") {
630        return Ok(());
631    }
632
633    let o5 = residue
634        .atom("O5'")
635        .ok_or_else(|| Error::incomplete_for_hydro(&residue.name, residue.id, "O5'"))?
636        .pos;
637    let c5 = residue
638        .atom("C5'")
639        .ok_or_else(|| Error::incomplete_for_hydro(&residue.name, residue.id, "C5'"))?
640        .pos;
641
642    let v_c5_o5 = (o5 - c5).normalize();
643    let aux = if v_c5_o5.z.abs() < 0.9 {
644        Vector3::z()
645    } else {
646        Vector3::x()
647    };
648    let perp = v_c5_o5.cross(&aux).normalize();
649
650    let h_dir = (v_c5_o5 + perp).normalize();
651    let h_pos = o5 + h_dir.scale(0.96);
652
653    residue.add_atom(Atom::new("HO5'", Element::H, h_pos));
654    Ok(())
655}
656
657/// Computes the optimal rigid transform mapping template anchor points to residue atoms.
658///
659/// Uses Kabsch alignment with safeguards for one- and two-point configurations.
660///
661/// # Arguments
662///
663/// * `r_pts` - Coordinates from the residue.
664/// * `t_pts` - Corresponding coordinates from the template.
665///
666/// # Returns
667///
668/// `Some((rotation, translation))` when alignment succeeds; otherwise `None`.
669fn calculate_transform(r_pts: &[Point], t_pts: &[Point]) -> Option<(Matrix3<f64>, Vector3<f64>)> {
670    let n = r_pts.len();
671    if n != t_pts.len() || n == 0 {
672        return None;
673    }
674
675    let r_center = r_pts.iter().map(|p| p.coords).sum::<Vector3<f64>>() / n as f64;
676    let t_center = t_pts.iter().map(|p| p.coords).sum::<Vector3<f64>>() / n as f64;
677
678    if n == 1 {
679        return Some((Matrix3::identity(), r_center - t_center));
680    }
681
682    if n == 2 {
683        let v_r = r_pts[1] - r_pts[0];
684        let v_t = t_pts[1] - t_pts[0];
685        let rot = Rotation3::rotation_between(&v_t, &v_r).unwrap_or_else(Rotation3::identity);
686        let trans = r_center - rot * t_center;
687        return Some((rot.into_inner(), trans));
688    }
689
690    let mut cov = Matrix3::zeros();
691    for (p_r, p_t) in r_pts.iter().zip(t_pts.iter()) {
692        let v_r = p_r.coords - r_center;
693        let v_t = p_t.coords - t_center;
694        cov += v_r * v_t.transpose();
695    }
696
697    let svd = cov.svd(true, true);
698    let u = svd.u?;
699    let v_t = svd.v_t?;
700
701    let mut rot = u * v_t;
702    if rot.determinant() < 0.0 {
703        let mut corr = Matrix3::identity();
704        corr[(2, 2)] = -1.0;
705        rot = u * corr * v_t;
706    }
707
708    let trans = r_center - rot * t_center;
709    Some((rot, trans))
710}
711
712#[cfg(test)]
713mod tests {
714    use super::*;
715    use crate::model::{
716        atom::Atom,
717        chain::Chain,
718        residue::Residue,
719        types::{Element, Point, ResidueCategory, ResiduePosition, StandardResidue},
720    };
721
722    fn residue_from_template(name: &str, std: StandardResidue, id: i32) -> Residue {
723        let template = db::get_template(name).unwrap_or_else(|| panic!("missing template {name}"));
724        let mut residue = Residue::new(id, None, name, Some(std), ResidueCategory::Standard);
725        residue.position = ResiduePosition::Internal;
726        for (atom_name, element, pos) in template.heavy_atoms() {
727            residue.add_atom(Atom::new(atom_name, element, pos));
728        }
729        residue
730    }
731
732    fn structure_with_residue(residue: Residue) -> Structure {
733        let mut chain = Chain::new("A");
734        chain.add_residue(residue);
735        let mut structure = Structure::new();
736        structure.add_chain(chain);
737        structure
738    }
739
740    fn structure_with_residues(residues: Vec<Residue>) -> Structure {
741        let mut chain = Chain::new("A");
742        for residue in residues {
743            chain.add_residue(residue);
744        }
745        let mut structure = Structure::new();
746        structure.add_chain(chain);
747        structure
748    }
749
750    fn n_terminal_residue(id: i32) -> Residue {
751        let mut residue = residue_from_template("ALA", StandardResidue::ALA, id);
752        residue.position = ResiduePosition::NTerminal;
753        residue
754    }
755
756    fn c_terminal_residue(id: i32) -> Residue {
757        let mut residue = residue_from_template("ALA", StandardResidue::ALA, id);
758        residue.position = ResiduePosition::CTerminal;
759        let c_pos = residue.atom("C").expect("C atom").pos;
760        let o_pos = residue.atom("O").expect("O atom").pos;
761        let offset = c_pos - o_pos;
762        let oxt_pos = c_pos + offset;
763        residue.add_atom(Atom::new("OXT", Element::O, oxt_pos));
764        residue
765    }
766
767    #[test]
768    fn titratable_templates_exist_in_database() {
769        let expected = [
770            "ASP", "ASH", "GLU", "GLH", "LYS", "LYN", "ARG", "ARN", "CYS", "CYM", "TYR", "TYM",
771            "HID", "HIE", "HIP",
772        ];
773
774        for name in expected {
775            assert!(
776                db::get_template(name).is_some(),
777                "template {name} should exist"
778            );
779        }
780    }
781
782    #[test]
783    fn determine_protonation_state_tracks_pka_thresholds() {
784        let structure =
785            structure_with_residue(residue_from_template("ASP", StandardResidue::ASP, 1));
786        let mut config = HydroConfig {
787            target_ph: Some(2.5),
788            ..HydroConfig::default()
789        };
790        assert_eq!(
791            determine_protonation_state(&structure, 0, 0, &config),
792            Some("ASH".to_string())
793        );
794
795        config.target_ph = Some(5.0);
796        assert_eq!(
797            determine_protonation_state(&structure, 0, 0, &config),
798            Some("ASP".to_string())
799        );
800
801        let structure =
802            structure_with_residue(residue_from_template("LYS", StandardResidue::LYS, 2));
803        config.target_ph = Some(11.0);
804        assert_eq!(
805            determine_protonation_state(&structure, 0, 0, &config),
806            Some("LYN".to_string())
807        );
808
809        config.target_ph = Some(7.0);
810        assert_eq!(
811            determine_protonation_state(&structure, 0, 0, &config),
812            Some("LYS".to_string())
813        );
814    }
815
816    #[test]
817    fn determine_protonation_state_respects_his_strategy() {
818        let mut residue = residue_from_template("HID", StandardResidue::HIS, 3);
819        residue.name = "HIS".to_string();
820        let structure = structure_with_residue(residue);
821
822        let config = HydroConfig {
823            target_ph: Some(7.0),
824            his_strategy: HisStrategy::DirectHIE,
825            ..HydroConfig::default()
826        };
827
828        assert_eq!(
829            determine_protonation_state(&structure, 0, 0, &config),
830            Some("HIE".to_string())
831        );
832
833        let mut acid_config = HydroConfig::default();
834        acid_config.target_ph = Some(5.5);
835        assert_eq!(
836            determine_protonation_state(&structure, 0, 0, &acid_config),
837            Some("HIP".to_string())
838        );
839    }
840
841    #[test]
842    fn n_terminal_defaults_to_protonated_without_ph() {
843        let residue = n_terminal_residue(40);
844        let mut structure = structure_with_residue(residue);
845
846        add_hydrogens(&mut structure, &HydroConfig::default()).expect("hydrogenation succeeds");
847
848        let residue = structure.find_residue("A", 40, None).unwrap();
849        assert!(residue.has_atom("H1"));
850        assert!(residue.has_atom("H2"));
851        assert!(residue.has_atom("H3"));
852    }
853
854    #[test]
855    fn n_terminal_deprotonates_above_pka() {
856        let residue = n_terminal_residue(41);
857        let mut structure = structure_with_residue(residue);
858        let mut config = HydroConfig::default();
859        config.target_ph = Some(9.0);
860
861        add_hydrogens(&mut structure, &config).expect("hydrogenation succeeds");
862
863        let residue = structure.find_residue("A", 41, None).unwrap();
864        assert!(residue.has_atom("H1"));
865        assert!(residue.has_atom("H2"));
866        assert!(!residue.has_atom("H3"));
867    }
868
869    #[test]
870    fn c_terminal_protonates_under_acidic_ph() {
871        let residue = c_terminal_residue(50);
872        let mut structure = structure_with_residue(residue);
873        let mut config = HydroConfig::default();
874        config.target_ph = Some(2.5);
875
876        add_hydrogens(&mut structure, &config).expect("hydrogenation succeeds");
877
878        let residue = structure.find_residue("A", 50, None).unwrap();
879        assert!(residue.has_atom("HOXT"));
880    }
881
882    #[test]
883    fn c_terminal_remains_deprotonated_at_physiological_ph() {
884        let residue = c_terminal_residue(51);
885        let mut structure = structure_with_residue(residue);
886
887        add_hydrogens(&mut structure, &HydroConfig::default()).expect("hydrogenation succeeds");
888
889        let residue = structure.find_residue("A", 51, None).unwrap();
890        assert!(!residue.has_atom("HOXT"));
891    }
892
893    #[test]
894    fn determine_protonation_state_skips_already_marked_cyx() {
895        let mut residue = residue_from_template("CYS", StandardResidue::CYS, 25);
896        residue.name = "CYX".to_string();
897        let structure = structure_with_residue(residue);
898        let mut config = HydroConfig::default();
899        config.target_ph = Some(9.0);
900
901        assert_eq!(determine_protonation_state(&structure, 0, 0, &config), None);
902    }
903
904    #[test]
905    fn add_hydrogens_populates_internal_lysine_side_chain() {
906        let mut residue = residue_from_template("LYS", StandardResidue::LYS, 10);
907        residue.add_atom(Atom::new("FAKE", Element::H, Point::origin()));
908        let mut structure = structure_with_residue(residue);
909
910        add_hydrogens(&mut structure, &HydroConfig::default()).expect("hydrogenation succeeds");
911
912        let residue = structure.find_residue("A", 10, None).unwrap();
913        assert!(residue.has_atom("HZ1"));
914        assert!(residue.has_atom("HZ2"));
915        assert!(residue.has_atom("HZ3"));
916        assert!(!residue.has_atom("FAKE"));
917    }
918
919    #[test]
920    fn add_hydrogens_relabels_asp_under_acidic_ph() {
921        let residue = residue_from_template("ASP", StandardResidue::ASP, 15);
922        let mut structure = structure_with_residue(residue);
923        let mut config = HydroConfig::default();
924        config.target_ph = Some(2.0);
925
926        add_hydrogens(&mut structure, &config).expect("hydrogenation succeeds");
927
928        let residue = structure.find_residue("A", 15, None).unwrap();
929        assert_eq!(residue.name, "ASH");
930        assert!(residue.has_atom("HD2"));
931    }
932
933    #[test]
934    fn construct_hydrogens_errors_when_anchor_missing() {
935        let template = db::get_template("ALA").expect("template ALA");
936        let mut residue = Residue::new(
937            20,
938            None,
939            "ALA",
940            Some(StandardResidue::ALA),
941            ResidueCategory::Standard,
942        );
943        residue.position = ResiduePosition::Internal;
944
945        let (name, element, pos) = template.heavy_atoms().next().unwrap();
946        residue.add_atom(Atom::new(name, element, pos));
947
948        let err = construct_hydrogens_for_residue(&mut residue, &HydroConfig::default())
949            .expect_err("should fail");
950        assert!(matches!(err, Error::IncompleteResidueForHydro { .. }));
951    }
952
953    #[test]
954    fn close_cysteines_are_relabelled_to_cyx_and_skip_hydrogens() {
955        let cys1 = residue_from_template("CYS", StandardResidue::CYS, 30);
956        let mut cys2 = residue_from_template("CYS", StandardResidue::CYS, 31);
957
958        let sg1 = cys1.atom("SG").expect("SG in cys1").pos;
959        let sg2 = cys2.atom("SG").expect("SG in cys2").pos;
960        let desired = sg1 + Vector3::new(0.5, 0.0, 0.0);
961        let offset = desired - sg2;
962        for atom in cys2.iter_atoms_mut() {
963            atom.translate_by(&offset);
964        }
965
966        let mut structure = structure_with_residues(vec![cys1, cys2]);
967        add_hydrogens(&mut structure, &HydroConfig::default()).expect("hydrogenation");
968
969        let res1 = structure.find_residue("A", 30, None).unwrap();
970        let res2 = structure.find_residue("A", 31, None).unwrap();
971        assert_eq!(res1.name, "CYX");
972        assert_eq!(res2.name, "CYX");
973        assert!(!res1.has_atom("HG"));
974        assert!(!res2.has_atom("HG"));
975    }
976}