bio_forge/ops/
hydro.rs

1//! Protonation-aware hydrogen placement for protein and nucleic acid structures.
2//!
3//! This module inspects residue templates, predicts protonation states from pH or hydrogen
4//! bonding networks, relabels residues accordingly, and rebuilds missing hydrogens while
5//! respecting polymer termini, nucleic acid priming, and disulfide bridges.
6
7use crate::db;
8use crate::model::{
9    atom::Atom,
10    residue::Residue,
11    structure::Structure,
12    types::{Element, Point, ResidueCategory, ResiduePosition, StandardResidue},
13};
14use crate::ops::error::Error;
15use nalgebra::{Matrix3, Rotation3, Vector3};
16use rand::Rng;
17use std::collections::HashSet;
18
19/// Maximum sulfur–sulfur distance (Å) used to detect disulfide bridges.
20const DISULFIDE_SG_THRESHOLD: f64 = 2.2;
21/// Henderson–Hasselbalch breakpoint for protonated N-termini.
22const N_TERM_PKA: f64 = 8.0;
23/// Henderson–Hasselbalch breakpoint for protonated C-termini.
24const C_TERM_PKA: f64 = 3.1;
25/// Henderson–Hasselbalch breakpoint for the second dissociation of terminal phosphate.
26const PHOSPHATE_PKA2: f64 = 6.5;
27
28/// Parameters controlling hydrogen addition behavior.
29///
30/// `HydroConfig` can target a specific solution pH, remove pre-existing hydrogens, and
31/// choose how neutral histidine tautomers are assigned.
32#[derive(Debug, Clone)]
33pub struct HydroConfig {
34    /// Optional solvent pH value used for titration decisions.
35    pub target_ph: Option<f64>,
36    /// Whether to strip all existing hydrogens before reconstruction.
37    pub remove_existing_h: bool,
38    /// Strategy for setting neutral histidine tautomer labels.
39    pub his_strategy: HisStrategy,
40}
41
42impl Default for HydroConfig {
43    /// Provides biologically reasonable defaults (physiological pH, removal of old hydrogens,
44    /// and hydrogen-bond-aware histidine selection).
45    fn default() -> Self {
46        Self {
47            target_ph: None,
48            remove_existing_h: true,
49            his_strategy: HisStrategy::HbNetwork,
50        }
51    }
52}
53
54/// Strategies for choosing neutral histidine labels.
55#[derive(Debug, Clone, Copy, PartialEq, Eq)]
56pub enum HisStrategy {
57    /// Force all neutral histidines to HID (delta-protonated).
58    DirectHID,
59    /// Force all neutral histidines to HIE (epsilon-protonated).
60    DirectHIE,
61    /// Randomly choose between HID and HIE.
62    Random,
63    /// Analyze hydrogen-bond networks to select the tautomer most likely to bond.
64    HbNetwork,
65}
66
67/// Adds hydrogens to all standard residues in-place, updating protonation states when needed.
68///
69/// Disulfide bridges are detected prior to titration and preserved by relabeling cysteines to
70/// `CYX`. Residues lacking required anchor atoms will trigger descriptive errors so upstream
71/// cleanup steps can correct the issues.
72///
73/// # Arguments
74///
75/// * `structure` - Mutable structure whose residues will be protonated and hydrated.
76/// * `config` - Hydrogenation configuration, including pH target and histidine strategy.
77///
78/// # Returns
79///
80/// `Ok(())` when hydrogenation succeeds.
81///
82/// # Errors
83///
84/// Returns [`Error::MissingInternalTemplate`] when no template is found or
85/// [`Error::IncompleteResidueForHydro`] when required anchor atoms are missing.
86pub fn add_hydrogens(structure: &mut Structure, config: &HydroConfig) -> Result<(), Error> {
87    let mut targets = Vec::new();
88    for (c_idx, chain) in structure.iter_chains().enumerate() {
89        for (r_idx, residue) in chain.iter_residues().enumerate() {
90            if residue.category == ResidueCategory::Standard {
91                targets.push((c_idx, r_idx));
92            }
93        }
94    }
95
96    mark_disulfide_bridges(structure);
97
98    for (c_idx, r_idx) in targets {
99        let new_name = determine_protonation_state(structure, c_idx, r_idx, config);
100
101        let chain = structure.iter_chains_mut().nth(c_idx).unwrap();
102        let residue = chain.iter_residues_mut().nth(r_idx).unwrap();
103
104        if let Some(name) = new_name {
105            residue.name = name;
106        }
107
108        if config.remove_existing_h {
109            residue.strip_hydrogens();
110        }
111
112        construct_hydrogens_for_residue(residue, config)?;
113    }
114
115    Ok(())
116}
117
118/// Predicts the protonation-induced residue rename for a given polymer residue.
119///
120/// Applies residue-specific pKa thresholds, handles histidine tautomer selection, and
121/// respects previously tagged disulfide cysteines.
122///
123/// # Arguments
124///
125/// * `structure` - Structure providing context for hydrogen-bond analysis.
126/// * `c_idx` - Chain index of the residue under evaluation.
127/// * `r_idx` - Residue index within the chain.
128/// * `config` - Hydrogenation configuration containing pH and histidine options.
129///
130/// # Returns
131///
132/// `Some(new_name)` when the residue should be relabeled; otherwise `None`.
133fn determine_protonation_state(
134    structure: &Structure,
135    c_idx: usize,
136    r_idx: usize,
137    config: &HydroConfig,
138) -> Option<String> {
139    let chain = structure.iter_chains().nth(c_idx)?;
140    let residue = chain.iter_residues().nth(r_idx)?;
141    let std = residue.standard_name?;
142
143    if std == StandardResidue::CYS && residue.name == "CYX" {
144        return None;
145    }
146
147    if let Some(ph) = config.target_ph {
148        return match std {
149            StandardResidue::ASP => Some(if ph < 3.9 {
150                "ASH".to_string()
151            } else {
152                "ASP".to_string()
153            }),
154            StandardResidue::GLU => Some(if ph < 4.2 {
155                "GLH".to_string()
156            } else {
157                "GLU".to_string()
158            }),
159            StandardResidue::LYS => Some(if ph > 10.5 {
160                "LYN".to_string()
161            } else {
162                "LYS".to_string()
163            }),
164            StandardResidue::ARG => Some(if ph > 12.5 {
165                "ARN".to_string()
166            } else {
167                "ARG".to_string()
168            }),
169            StandardResidue::CYS => Some(if ph > 8.3 {
170                "CYM".to_string()
171            } else {
172                "CYS".to_string()
173            }),
174            StandardResidue::TYR => Some(if ph > 10.0 {
175                "TYM".to_string()
176            } else {
177                "TYR".to_string()
178            }),
179            StandardResidue::HIS => {
180                if ph < 6.0 {
181                    Some("HIP".to_string())
182                } else {
183                    Some(select_neutral_his(structure, residue, &config.his_strategy))
184                }
185            }
186            _ => None,
187        };
188    }
189
190    if std == StandardResidue::HIS && matches!(residue.name.as_str(), "HIS" | "HID" | "HIE" | "HIP")
191    {
192        return Some(select_neutral_his(structure, residue, &config.his_strategy));
193    }
194
195    None
196}
197
198/// Identifies cysteine pairs forming disulfide bonds and renames them to `CYX`.
199///
200/// # Arguments
201///
202/// * `structure` - Mutable structure containing residues to scan and relabel.
203fn mark_disulfide_bridges(structure: &mut Structure) {
204    let mut cys_sulfurs = Vec::new();
205    for (c_idx, chain) in structure.iter_chains().enumerate() {
206        for (r_idx, residue) in chain.iter_residues().enumerate() {
207            if let (true, Some(sg)) = (
208                matches!(residue.standard_name, Some(StandardResidue::CYS)),
209                residue.atom("SG"),
210            ) {
211                cys_sulfurs.push((c_idx, r_idx, sg.pos));
212            }
213        }
214    }
215
216    if cys_sulfurs.len() < 2 {
217        return;
218    }
219
220    let threshold_sq = DISULFIDE_SG_THRESHOLD * DISULFIDE_SG_THRESHOLD;
221    let mut disulfide_residues: HashSet<(usize, usize)> = HashSet::new();
222    for i in 0..cys_sulfurs.len() {
223        for j in (i + 1)..cys_sulfurs.len() {
224            let (ci, ri, pos_i) = &cys_sulfurs[i];
225            let (cj, rj, pos_j) = &cys_sulfurs[j];
226            if (*pos_i - *pos_j).norm_squared() <= threshold_sq {
227                disulfide_residues.insert((*ci, *ri));
228                disulfide_residues.insert((*cj, *rj));
229            }
230        }
231    }
232
233    if disulfide_residues.is_empty() {
234        return;
235    }
236
237    for (c_idx, chain) in structure.iter_chains_mut().enumerate() {
238        for (r_idx, residue) in chain.iter_residues_mut().enumerate() {
239            if disulfide_residues.contains(&(c_idx, r_idx)) && residue.name != "CYX" {
240                residue.name = "CYX".to_string();
241            }
242        }
243    }
244}
245
246/// Selects a neutral histidine tautomer using the configured strategy.
247///
248/// # Arguments
249///
250/// * `structure` - Structure used when hydrogen-bond networks are considered.
251/// * `residue` - Histidine residue being relabeled.
252/// * `strategy` - Selection strategy from [`HisStrategy`].
253///
254/// # Returns
255///
256/// Either `"HID"` or `"HIE"`.
257fn select_neutral_his(structure: &Structure, residue: &Residue, strategy: &HisStrategy) -> String {
258    match strategy {
259        HisStrategy::DirectHID => "HID".to_string(),
260        HisStrategy::DirectHIE => "HIE".to_string(),
261        HisStrategy::Random => {
262            let mut rng = rand::rng();
263            if rng.random_bool(0.5) {
264                "HID".to_string()
265            } else {
266                "HIE".to_string()
267            }
268        }
269        HisStrategy::HbNetwork => optimize_his_network(structure, residue),
270    }
271}
272
273/// Determines the best histidine tautomer by inspecting nearby hydrogen-bond acceptors.
274///
275/// # Arguments
276///
277/// * `structure` - Complete structure used to search for acceptor atoms.
278/// * `residue` - Histidine residue being evaluated.
279///
280/// # Returns
281///
282/// The histidine label (`"HID"` or `"HIE"`) that maximizes compatibility with neighbors.
283fn optimize_his_network(structure: &Structure, residue: &Residue) -> String {
284    let nd1 = residue.atom("ND1");
285    let ne2 = residue.atom("NE2");
286
287    let has_acceptor_near = |atom: &Atom| -> bool {
288        for chain in structure.iter_chains() {
289            for other_res in chain.iter_residues() {
290                if other_res.id == residue.id && chain.id == "Unknown" {
291                    continue;
292                }
293                if other_res == residue {
294                    continue;
295                }
296
297                for other_atom in other_res.iter_atoms() {
298                    if matches!(other_atom.element, Element::O | Element::N)
299                        && atom.distance_squared(other_atom) < 3.5 * 3.5
300                    {
301                        return true;
302                    }
303                }
304            }
305        }
306        false
307    };
308
309    let nd1_interaction = nd1.map(has_acceptor_near).unwrap_or(false);
310    let ne2_interaction = ne2.map(has_acceptor_near).unwrap_or(false);
311
312    match (nd1_interaction, ne2_interaction) {
313        (true, false) => "HID".to_string(),
314        (false, true) => "HIE".to_string(),
315        _ => "HID".to_string(),
316    }
317}
318
319/// Rebuilds hydrogens for a single residue using template geometry and terminal rules.
320///
321/// # Arguments
322///
323/// * `residue` - Residue to augment with hydrogens.
324/// * `config` - Hydrogenation configuration influencing terminal protonation.
325///
326/// # Returns
327///
328/// `Ok(())` when all hydrogens were added or already present.
329///
330/// # Errors
331///
332/// Returns [`Error::MissingInternalTemplate`] or [`Error::IncompleteResidueForHydro`] when
333/// required template data or anchor atoms are missing.
334fn construct_hydrogens_for_residue(
335    residue: &mut Residue,
336    config: &HydroConfig,
337) -> Result<(), Error> {
338    let template_name = residue.name.clone();
339
340    let template_view =
341        db::get_template(&template_name).ok_or_else(|| Error::MissingInternalTemplate {
342            res_name: template_name.clone(),
343        })?;
344
345    let existing_atoms: HashSet<String> = residue.atoms().iter().map(|a| a.name.clone()).collect();
346
347    for (h_name, h_tmpl_pos, anchors_iter) in template_view.hydrogens() {
348        if existing_atoms.contains(h_name) {
349            continue;
350        }
351
352        let anchors: Vec<&str> = anchors_iter.collect();
353        if let Ok(pos) = reconstruct_geometry(residue, h_tmpl_pos, &anchors) {
354            residue.add_atom(Atom::new(h_name, Element::H, pos));
355        } else {
356            return Err(Error::incomplete_for_hydro(
357                &residue.name,
358                residue.id,
359                anchors.first().copied().unwrap_or("?"),
360            ));
361        }
362    }
363
364    match residue.position {
365        ResiduePosition::NTerminal if residue.standard_name.is_some_and(|s| s.is_protein()) => {
366            construct_n_term_hydrogens(residue, n_term_should_be_protonated(config))?;
367        }
368        ResiduePosition::CTerminal if residue.standard_name.is_some_and(|s| s.is_protein()) => {
369            construct_c_term_hydrogen(residue, c_term_should_be_protonated(config))?;
370        }
371        ResiduePosition::ThreePrime if residue.standard_name.is_some_and(|s| s.is_nucleic()) => {
372            construct_3_prime_hydrogen(residue)?;
373        }
374        ResiduePosition::FivePrime if residue.standard_name.is_some_and(|s| s.is_nucleic()) => {
375            if residue.has_atom("P") {
376                construct_5_prime_phosphate_hydrogens(residue, config)?;
377            } else if residue.has_atom("O5'") {
378                construct_5_prime_hydrogen(residue)?;
379            }
380        }
381        _ => {}
382    }
383
384    Ok(())
385}
386
387/// Evaluates whether an N-terminus should remain protonated under the configured pH.
388///
389/// # Arguments
390///
391/// * `config` - Hydrogenation configuration that may provide a target pH.
392///
393/// # Returns
394///
395/// `true` when the pH is below the configured threshold or unspecified.
396fn n_term_should_be_protonated(config: &HydroConfig) -> bool {
397    config.target_ph.map(|ph| ph < N_TERM_PKA).unwrap_or(true)
398}
399
400/// Evaluates whether a C-terminus should remain protonated under the configured pH.
401///
402/// # Arguments
403///
404/// * `config` - Hydrogenation configuration potentially specifying pH.
405///
406/// # Returns
407///
408/// `true` when the pH is below the acidic cutoff; otherwise `false`.
409fn c_term_should_be_protonated(config: &HydroConfig) -> bool {
410    config.target_ph.map(|ph| ph < C_TERM_PKA).unwrap_or(false)
411}
412
413/// Aligns template coordinates to residue anchors to predict a hydrogen position.
414///
415/// # Arguments
416///
417/// * `residue` - Residue containing measured anchor atoms.
418/// * `target_tmpl_pos` - Target hydrogen position from the template.
419/// * `anchor_names` - Atom names used to determine the rigid-body transform.
420///
421/// # Returns
422///
423/// `Ok(point)` containing the placed coordinate or `Err(())` if anchors are missing.
424fn reconstruct_geometry(
425    residue: &Residue,
426    target_tmpl_pos: Point,
427    anchor_names: &[&str],
428) -> Result<Point, ()> {
429    let template_view = db::get_template(&residue.name).ok_or(())?;
430
431    let mut residue_pts = Vec::new();
432    let mut template_pts = Vec::new();
433
434    for name in anchor_names {
435        let r_atom = residue.atom(name).ok_or(())?;
436        residue_pts.push(r_atom.pos);
437
438        let t_pos = template_view
439            .heavy_atoms()
440            .find(|(n, _, _)| n == name)
441            .map(|(_, _, p)| p)
442            .ok_or(())?;
443        template_pts.push(t_pos);
444    }
445
446    let (rot, trans) = calculate_transform(&residue_pts, &template_pts).ok_or(())?;
447
448    Ok(rot * target_tmpl_pos + trans)
449}
450
451/// Rebuilds the N-terminal amine hydrogens using tetrahedral geometry.
452///
453/// # Arguments
454///
455/// * `residue` - Residue whose terminal hydrogens are being reconstructed.
456/// * `protonated` - Whether to place three hydrogens (`true`) or two (`false`).
457///
458/// # Returns
459///
460/// `Ok(())` when hydrogens are successfully placed.
461///
462/// # Errors
463///
464/// Returns [`Error::IncompleteResidueForHydro`] if the N or CA atoms are missing.
465fn construct_n_term_hydrogens(residue: &mut Residue, protonated: bool) -> Result<(), Error> {
466    residue.remove_atom("H");
467    residue.remove_atom("H1");
468    residue.remove_atom("H2");
469    residue.remove_atom("H3");
470
471    let n_pos = residue
472        .atom("N")
473        .ok_or_else(|| Error::incomplete_for_hydro(&residue.name, residue.id, "N"))?
474        .pos;
475    let ca_pos = residue
476        .atom("CA")
477        .ok_or_else(|| Error::incomplete_for_hydro(&residue.name, residue.id, "CA"))?
478        .pos;
479
480    let v_ca_n = (n_pos - ca_pos).normalize();
481    let bond_len = 1.0;
482    let angle = 109.5_f64.to_radians();
483    let sin_a = angle.sin();
484    let cos_a = angle.cos();
485
486    let up = if v_ca_n.x.abs() < 0.9 {
487        Vector3::x()
488    } else {
489        Vector3::y()
490    };
491    let v_perp = v_ca_n.cross(&up).normalize();
492    let base_dir = v_ca_n.scale(cos_a) + v_perp.scale(sin_a);
493
494    let phases = [0.0_f64, 120.0, 240.0];
495    let mut candidates: Vec<Vector3<f64>> = phases
496        .iter()
497        .map(|deg| {
498            let rot_axis = Rotation3::from_axis_angle(
499                &nalgebra::Unit::new_normalize(v_ca_n),
500                deg.to_radians(),
501            );
502            rot_axis * base_dir
503        })
504        .collect();
505
506    candidates.sort_by(|a, b| {
507        a.dot(&v_ca_n)
508            .partial_cmp(&b.dot(&v_ca_n))
509            .unwrap_or(std::cmp::Ordering::Equal)
510    });
511
512    let target_count = if protonated { 3 } else { 2 };
513    let names = ["H1", "H2", "H3"];
514    for (idx, dir) in candidates.into_iter().take(target_count).enumerate() {
515        residue.add_atom(Atom::new(
516            names[idx],
517            Element::H,
518            n_pos + dir.scale(bond_len),
519        ));
520    }
521
522    Ok(())
523}
524
525/// Rebuilds the carboxylate proton at the C-terminus when protonated.
526///
527/// # Arguments
528///
529/// * `residue` - Residue to modify.
530/// * `protonated` - Whether the terminus should include the `HOXT` hydrogen.
531///
532/// # Returns
533///
534/// `Ok(())` after either removing or adding hydrogens as needed.
535///
536/// # Errors
537///
538/// Returns [`Error::IncompleteResidueForHydro`] if the `C` or `OXT` atoms are missing.
539fn construct_c_term_hydrogen(residue: &mut Residue, protonated: bool) -> Result<(), Error> {
540    if !protonated {
541        residue.remove_atom("HOXT");
542        residue.remove_atom("HXT");
543        return Ok(());
544    }
545
546    residue.remove_atom("HXT");
547    if residue.has_atom("HOXT") {
548        return Ok(());
549    }
550
551    let c_pos = residue
552        .atom("C")
553        .ok_or_else(|| Error::incomplete_for_hydro(&residue.name, residue.id, "C"))?
554        .pos;
555    let oxt_pos = residue
556        .atom("OXT")
557        .ok_or_else(|| Error::incomplete_for_hydro(&residue.name, residue.id, "OXT"))?
558        .pos;
559
560    let direction = oxt_pos - c_pos;
561    if direction.norm_squared() < 1e-6 {
562        return Err(Error::incomplete_for_hydro(
563            &residue.name,
564            residue.id,
565            "OXT",
566        ));
567    }
568    let dir = direction.normalize();
569    let h_pos = oxt_pos + dir.scale(0.97);
570
571    residue.add_atom(Atom::new("HOXT", Element::H, h_pos));
572    Ok(())
573}
574
575/// Adds the 3'-terminal hydroxyl hydrogen to nucleic acid residues when absent.
576///
577/// # Arguments
578///
579/// * `residue` - Residue representing a nucleic acid terminal unit.
580///
581/// # Returns
582///
583/// `Ok(())` when the hydrogen is added or already present.
584///
585/// # Errors
586///
587/// Returns [`Error::IncompleteResidueForHydro`] if required sugar atoms are missing.
588fn construct_3_prime_hydrogen(residue: &mut Residue) -> Result<(), Error> {
589    if residue.has_atom("HO3'") {
590        return Ok(());
591    }
592
593    let o3 = residue
594        .atom("O3'")
595        .ok_or_else(|| Error::incomplete_for_hydro(&residue.name, residue.id, "O3'"))?
596        .pos;
597    let c3 = residue
598        .atom("C3'")
599        .ok_or_else(|| Error::incomplete_for_hydro(&residue.name, residue.id, "C3'"))?
600        .pos;
601    let c4 = residue
602        .atom("C4'")
603        .or_else(|| residue.atom("C2'"))
604        .map(|a| a.pos)
605        .unwrap_or_else(|| c3 + Vector3::x());
606
607    let v_c3_o3 = (o3 - c3).normalize();
608
609    let v_c4_c3 = (c3 - c4).normalize();
610    let normal = v_c3_o3.cross(&v_c4_c3).normalize();
611
612    let h_dir = (v_c3_o3 + normal).normalize();
613    let h_pos = o3 + h_dir.scale(0.96);
614
615    residue.add_atom(Atom::new("HO3'", Element::H, h_pos));
616    Ok(())
617}
618
619/// Adds the 5'-terminal hydroxyl hydrogen for nucleic acid residues lacking phosphates.
620///
621/// # Arguments
622///
623/// * `residue` - Residue whose 5' terminus is being hydrated.
624///
625/// # Returns
626///
627/// `Ok(())` when the hydrogen is added or already exists.
628///
629/// # Errors
630///
631/// Returns [`Error::IncompleteResidueForHydro`] if sugar anchor atoms are missing.
632fn construct_5_prime_hydrogen(residue: &mut Residue) -> Result<(), Error> {
633    if residue.has_atom("HO5'") {
634        return Ok(());
635    }
636
637    let o5 = residue
638        .atom("O5'")
639        .ok_or_else(|| Error::incomplete_for_hydro(&residue.name, residue.id, "O5'"))?
640        .pos;
641    let c5 = residue
642        .atom("C5'")
643        .ok_or_else(|| Error::incomplete_for_hydro(&residue.name, residue.id, "C5'"))?
644        .pos;
645
646    let v_c5_o5 = (o5 - c5).normalize();
647    let aux = if v_c5_o5.z.abs() < 0.9 {
648        Vector3::z()
649    } else {
650        Vector3::x()
651    };
652    let perp = v_c5_o5.cross(&aux).normalize();
653
654    let h_dir = (v_c5_o5 + perp).normalize();
655    let h_pos = o5 + h_dir.scale(0.96);
656
657    residue.add_atom(Atom::new("HO5'", Element::H, h_pos));
658    Ok(())
659}
660
661/// Adds hydrogens to 5'-terminal phosphate groups based on pH.
662///
663/// At physiological pH (≥6.5), the terminal phosphate carries two negative charges and
664/// requires no protons. Below this threshold, one proton is added to OP3. The function
665/// respects existing hydrogens when `remove_existing_h` is disabled.
666///
667/// # Arguments
668///
669/// * `residue` - Nucleic acid residue with a 5'-terminal phosphate.
670/// * `config` - Hydrogenation configuration containing pH settings.
671///
672/// # Returns
673///
674/// `Ok(())` when hydrogens are appropriately placed or removed.
675///
676/// # Errors
677///
678/// Returns [`Error::IncompleteResidueForHydro`] if phosphate atoms are missing.
679fn construct_5_prime_phosphate_hydrogens(
680    residue: &mut Residue,
681    config: &HydroConfig,
682) -> Result<(), Error> {
683    let ph = config.target_ph.unwrap_or(7.4);
684
685    if ph >= PHOSPHATE_PKA2 {
686        residue.remove_atom("HOP3");
687        residue.remove_atom("HOP2");
688        return Ok(());
689    }
690
691    if residue.has_atom("HOP3") {
692        return Ok(());
693    }
694
695    let op3 = residue
696        .atom("OP3")
697        .ok_or_else(|| Error::incomplete_for_hydro(&residue.name, residue.id, "OP3"))?
698        .pos;
699    let p = residue
700        .atom("P")
701        .ok_or_else(|| Error::incomplete_for_hydro(&residue.name, residue.id, "P"))?
702        .pos;
703
704    let direction = (op3 - p).normalize();
705    let h_pos = op3 + direction * 0.96;
706
707    residue.add_atom(Atom::new("HOP3", Element::H, h_pos));
708    Ok(())
709}
710
711/// Computes the optimal rigid transform mapping template anchor points to residue atoms.
712///
713/// Uses Kabsch alignment with safeguards for one- and two-point configurations.
714///
715/// # Arguments
716///
717/// * `r_pts` - Coordinates from the residue.
718/// * `t_pts` - Corresponding coordinates from the template.
719///
720/// # Returns
721///
722/// `Some((rotation, translation))` when alignment succeeds; otherwise `None`.
723fn calculate_transform(r_pts: &[Point], t_pts: &[Point]) -> Option<(Matrix3<f64>, Vector3<f64>)> {
724    let n = r_pts.len();
725    if n != t_pts.len() || n == 0 {
726        return None;
727    }
728
729    let r_center = r_pts.iter().map(|p| p.coords).sum::<Vector3<f64>>() / n as f64;
730    let t_center = t_pts.iter().map(|p| p.coords).sum::<Vector3<f64>>() / n as f64;
731
732    if n == 1 {
733        return Some((Matrix3::identity(), r_center - t_center));
734    }
735
736    if n == 2 {
737        let v_r = r_pts[1] - r_pts[0];
738        let v_t = t_pts[1] - t_pts[0];
739        let rot = Rotation3::rotation_between(&v_t, &v_r).unwrap_or_else(Rotation3::identity);
740        let trans = r_center - rot * t_center;
741        return Some((rot.into_inner(), trans));
742    }
743
744    let mut cov = Matrix3::zeros();
745    for (p_r, p_t) in r_pts.iter().zip(t_pts.iter()) {
746        let v_r = p_r.coords - r_center;
747        let v_t = p_t.coords - t_center;
748        cov += v_r * v_t.transpose();
749    }
750
751    let svd = cov.svd(true, true);
752    let u = svd.u?;
753    let v_t = svd.v_t?;
754
755    let mut rot = u * v_t;
756    if rot.determinant() < 0.0 {
757        let mut corr = Matrix3::identity();
758        corr[(2, 2)] = -1.0;
759        rot = u * corr * v_t;
760    }
761
762    let trans = r_center - rot * t_center;
763    Some((rot, trans))
764}
765
766#[cfg(test)]
767mod tests {
768    use super::*;
769    use crate::model::{
770        atom::Atom,
771        chain::Chain,
772        residue::Residue,
773        types::{Element, Point, ResidueCategory, ResiduePosition, StandardResidue},
774    };
775
776    fn residue_from_template(name: &str, std: StandardResidue, id: i32) -> Residue {
777        let template = db::get_template(name).unwrap_or_else(|| panic!("missing template {name}"));
778        let mut residue = Residue::new(id, None, name, Some(std), ResidueCategory::Standard);
779        residue.position = ResiduePosition::Internal;
780        for (atom_name, element, pos) in template.heavy_atoms() {
781            residue.add_atom(Atom::new(atom_name, element, pos));
782        }
783        residue
784    }
785
786    fn structure_with_residue(residue: Residue) -> Structure {
787        let mut chain = Chain::new("A");
788        chain.add_residue(residue);
789        let mut structure = Structure::new();
790        structure.add_chain(chain);
791        structure
792    }
793
794    fn structure_with_residues(residues: Vec<Residue>) -> Structure {
795        let mut chain = Chain::new("A");
796        for residue in residues {
797            chain.add_residue(residue);
798        }
799        let mut structure = Structure::new();
800        structure.add_chain(chain);
801        structure
802    }
803
804    fn n_terminal_residue(id: i32) -> Residue {
805        let mut residue = residue_from_template("ALA", StandardResidue::ALA, id);
806        residue.position = ResiduePosition::NTerminal;
807        residue
808    }
809
810    fn c_terminal_residue(id: i32) -> Residue {
811        let mut residue = residue_from_template("ALA", StandardResidue::ALA, id);
812        residue.position = ResiduePosition::CTerminal;
813        let c_pos = residue.atom("C").expect("C atom").pos;
814        let o_pos = residue.atom("O").expect("O atom").pos;
815        let offset = c_pos - o_pos;
816        let oxt_pos = c_pos + offset;
817        residue.add_atom(Atom::new("OXT", Element::O, oxt_pos));
818        residue
819    }
820
821    fn five_prime_residue_with_phosphate(id: i32) -> Residue {
822        let template = db::get_template("DA").unwrap();
823        let mut residue = Residue::new(
824            id,
825            None,
826            "DA",
827            Some(StandardResidue::DA),
828            ResidueCategory::Standard,
829        );
830        residue.position = ResiduePosition::FivePrime;
831        for (atom_name, element, pos) in template.heavy_atoms() {
832            residue.add_atom(Atom::new(atom_name, element, pos));
833        }
834        let p_pos = residue.atom("P").unwrap().pos;
835        let op1_pos = residue.atom("OP1").unwrap().pos;
836        let op2_pos = residue.atom("OP2").unwrap().pos;
837        let o5_pos = residue.atom("O5'").unwrap().pos;
838        let centroid = (op1_pos.coords + op2_pos.coords + o5_pos.coords) / 3.0;
839        let direction = (p_pos.coords - centroid).normalize();
840        let op3_pos = p_pos + direction * 1.48;
841        residue.add_atom(Atom::new("OP3", Element::O, op3_pos));
842        residue
843    }
844
845    fn five_prime_residue_without_phosphate(id: i32) -> Residue {
846        let template = db::get_template("DA").unwrap();
847        let mut residue = Residue::new(
848            id,
849            None,
850            "DA",
851            Some(StandardResidue::DA),
852            ResidueCategory::Standard,
853        );
854        residue.position = ResiduePosition::FivePrime;
855        for (atom_name, element, pos) in template.heavy_atoms() {
856            if !matches!(atom_name, "P" | "OP1" | "OP2") {
857                residue.add_atom(Atom::new(atom_name, element, pos));
858            }
859        }
860        residue
861    }
862
863    fn three_prime_residue(id: i32) -> Residue {
864        let template = db::get_template("DA").unwrap();
865        let mut residue = Residue::new(
866            id,
867            None,
868            "DA",
869            Some(StandardResidue::DA),
870            ResidueCategory::Standard,
871        );
872        residue.position = ResiduePosition::ThreePrime;
873        for (atom_name, element, pos) in template.heavy_atoms() {
874            residue.add_atom(Atom::new(atom_name, element, pos));
875        }
876        residue
877    }
878
879    #[test]
880    fn titratable_templates_exist_in_database() {
881        let expected = [
882            "ASP", "ASH", "GLU", "GLH", "LYS", "LYN", "ARG", "ARN", "CYS", "CYM", "TYR", "TYM",
883            "HID", "HIE", "HIP",
884        ];
885
886        for name in expected {
887            assert!(
888                db::get_template(name).is_some(),
889                "template {name} should exist"
890            );
891        }
892    }
893
894    #[test]
895    fn determine_protonation_state_tracks_pka_thresholds() {
896        let structure =
897            structure_with_residue(residue_from_template("ASP", StandardResidue::ASP, 1));
898        let mut config = HydroConfig {
899            target_ph: Some(2.5),
900            ..HydroConfig::default()
901        };
902        assert_eq!(
903            determine_protonation_state(&structure, 0, 0, &config),
904            Some("ASH".to_string())
905        );
906
907        config.target_ph = Some(5.0);
908        assert_eq!(
909            determine_protonation_state(&structure, 0, 0, &config),
910            Some("ASP".to_string())
911        );
912
913        let structure =
914            structure_with_residue(residue_from_template("LYS", StandardResidue::LYS, 2));
915        config.target_ph = Some(11.0);
916        assert_eq!(
917            determine_protonation_state(&structure, 0, 0, &config),
918            Some("LYN".to_string())
919        );
920
921        config.target_ph = Some(7.0);
922        assert_eq!(
923            determine_protonation_state(&structure, 0, 0, &config),
924            Some("LYS".to_string())
925        );
926    }
927
928    #[test]
929    fn determine_protonation_state_respects_his_strategy() {
930        let mut residue = residue_from_template("HID", StandardResidue::HIS, 3);
931        residue.name = "HIS".to_string();
932        let structure = structure_with_residue(residue);
933
934        let config = HydroConfig {
935            target_ph: Some(7.0),
936            his_strategy: HisStrategy::DirectHIE,
937            ..HydroConfig::default()
938        };
939
940        assert_eq!(
941            determine_protonation_state(&structure, 0, 0, &config),
942            Some("HIE".to_string())
943        );
944
945        let mut acid_config = HydroConfig::default();
946        acid_config.target_ph = Some(5.5);
947        assert_eq!(
948            determine_protonation_state(&structure, 0, 0, &acid_config),
949            Some("HIP".to_string())
950        );
951    }
952
953    #[test]
954    fn n_terminal_defaults_to_protonated_without_ph() {
955        let residue = n_terminal_residue(40);
956        let mut structure = structure_with_residue(residue);
957
958        add_hydrogens(&mut structure, &HydroConfig::default()).expect("hydrogenation succeeds");
959
960        let residue = structure.find_residue("A", 40, None).unwrap();
961        assert!(residue.has_atom("H1"));
962        assert!(residue.has_atom("H2"));
963        assert!(residue.has_atom("H3"));
964    }
965
966    #[test]
967    fn n_terminal_deprotonates_above_pka() {
968        let residue = n_terminal_residue(41);
969        let mut structure = structure_with_residue(residue);
970        let mut config = HydroConfig::default();
971        config.target_ph = Some(9.0);
972
973        add_hydrogens(&mut structure, &config).expect("hydrogenation succeeds");
974
975        let residue = structure.find_residue("A", 41, None).unwrap();
976        assert!(residue.has_atom("H1"));
977        assert!(residue.has_atom("H2"));
978        assert!(!residue.has_atom("H3"));
979    }
980
981    #[test]
982    fn c_terminal_protonates_under_acidic_ph() {
983        let residue = c_terminal_residue(50);
984        let mut structure = structure_with_residue(residue);
985        let mut config = HydroConfig::default();
986        config.target_ph = Some(2.5);
987
988        add_hydrogens(&mut structure, &config).expect("hydrogenation succeeds");
989
990        let residue = structure.find_residue("A", 50, None).unwrap();
991        assert!(residue.has_atom("HOXT"));
992    }
993
994    #[test]
995    fn c_terminal_remains_deprotonated_at_physiological_ph() {
996        let residue = c_terminal_residue(51);
997        let mut structure = structure_with_residue(residue);
998
999        add_hydrogens(&mut structure, &HydroConfig::default()).expect("hydrogenation succeeds");
1000
1001        let residue = structure.find_residue("A", 51, None).unwrap();
1002        assert!(!residue.has_atom("HOXT"));
1003    }
1004
1005    #[test]
1006    fn determine_protonation_state_skips_already_marked_cyx() {
1007        let mut residue = residue_from_template("CYS", StandardResidue::CYS, 25);
1008        residue.name = "CYX".to_string();
1009        let structure = structure_with_residue(residue);
1010        let mut config = HydroConfig::default();
1011        config.target_ph = Some(9.0);
1012
1013        assert_eq!(determine_protonation_state(&structure, 0, 0, &config), None);
1014    }
1015
1016    #[test]
1017    fn add_hydrogens_populates_internal_lysine_side_chain() {
1018        let mut residue = residue_from_template("LYS", StandardResidue::LYS, 10);
1019        residue.add_atom(Atom::new("FAKE", Element::H, Point::origin()));
1020        let mut structure = structure_with_residue(residue);
1021
1022        add_hydrogens(&mut structure, &HydroConfig::default()).expect("hydrogenation succeeds");
1023
1024        let residue = structure.find_residue("A", 10, None).unwrap();
1025        assert!(residue.has_atom("HZ1"));
1026        assert!(residue.has_atom("HZ2"));
1027        assert!(residue.has_atom("HZ3"));
1028        assert!(!residue.has_atom("FAKE"));
1029    }
1030
1031    #[test]
1032    fn add_hydrogens_relabels_asp_under_acidic_ph() {
1033        let residue = residue_from_template("ASP", StandardResidue::ASP, 15);
1034        let mut structure = structure_with_residue(residue);
1035        let mut config = HydroConfig::default();
1036        config.target_ph = Some(2.0);
1037
1038        add_hydrogens(&mut structure, &config).expect("hydrogenation succeeds");
1039
1040        let residue = structure.find_residue("A", 15, None).unwrap();
1041        assert_eq!(residue.name, "ASH");
1042        assert!(residue.has_atom("HD2"));
1043    }
1044
1045    #[test]
1046    fn construct_hydrogens_errors_when_anchor_missing() {
1047        let template = db::get_template("ALA").expect("template ALA");
1048        let mut residue = Residue::new(
1049            20,
1050            None,
1051            "ALA",
1052            Some(StandardResidue::ALA),
1053            ResidueCategory::Standard,
1054        );
1055        residue.position = ResiduePosition::Internal;
1056
1057        let (name, element, pos) = template.heavy_atoms().next().unwrap();
1058        residue.add_atom(Atom::new(name, element, pos));
1059
1060        let err = construct_hydrogens_for_residue(&mut residue, &HydroConfig::default())
1061            .expect_err("should fail");
1062        assert!(matches!(err, Error::IncompleteResidueForHydro { .. }));
1063    }
1064
1065    #[test]
1066    fn close_cysteines_are_relabelled_to_cyx_and_skip_hydrogens() {
1067        let cys1 = residue_from_template("CYS", StandardResidue::CYS, 30);
1068        let mut cys2 = residue_from_template("CYS", StandardResidue::CYS, 31);
1069
1070        let sg1 = cys1.atom("SG").expect("SG in cys1").pos;
1071        let sg2 = cys2.atom("SG").expect("SG in cys2").pos;
1072        let desired = sg1 + Vector3::new(0.5, 0.0, 0.0);
1073        let offset = desired - sg2;
1074        for atom in cys2.iter_atoms_mut() {
1075            atom.translate_by(&offset);
1076        }
1077
1078        let mut structure = structure_with_residues(vec![cys1, cys2]);
1079        add_hydrogens(&mut structure, &HydroConfig::default()).expect("hydrogenation");
1080
1081        let res1 = structure.find_residue("A", 30, None).unwrap();
1082        let res2 = structure.find_residue("A", 31, None).unwrap();
1083        assert_eq!(res1.name, "CYX");
1084        assert_eq!(res2.name, "CYX");
1085        assert!(!res1.has_atom("HG"));
1086        assert!(!res2.has_atom("HG"));
1087    }
1088
1089    #[test]
1090    fn five_prime_phosphate_deprotonated_at_physiological_ph() {
1091        let residue = five_prime_residue_with_phosphate(60);
1092        let mut structure = structure_with_residue(residue);
1093
1094        add_hydrogens(&mut structure, &HydroConfig::default()).expect("hydrogenation succeeds");
1095
1096        let residue = structure.find_residue("A", 60, None).unwrap();
1097        assert!(residue.has_atom("OP3"), "OP3 should remain");
1098        assert!(
1099            !residue.has_atom("HOP3"),
1100            "HOP3 should not exist at neutral pH"
1101        );
1102        assert!(
1103            !residue.has_atom("HOP2"),
1104            "HOP2 should not exist at neutral pH"
1105        );
1106    }
1107
1108    #[test]
1109    fn five_prime_phosphate_protonated_below_pka() {
1110        let residue = five_prime_residue_with_phosphate(61);
1111        let mut structure = structure_with_residue(residue);
1112        let mut config = HydroConfig::default();
1113        config.target_ph = Some(5.5);
1114
1115        add_hydrogens(&mut structure, &config).expect("hydrogenation succeeds");
1116
1117        let residue = structure.find_residue("A", 61, None).unwrap();
1118        assert!(residue.has_atom("OP3"), "OP3 should remain");
1119        assert!(residue.has_atom("HOP3"), "HOP3 should be added below pKa");
1120    }
1121
1122    #[test]
1123    fn five_prime_without_phosphate_gets_ho5() {
1124        let residue = five_prime_residue_without_phosphate(62);
1125        let mut structure = structure_with_residue(residue);
1126
1127        add_hydrogens(&mut structure, &HydroConfig::default()).expect("hydrogenation succeeds");
1128
1129        let residue = structure.find_residue("A", 62, None).unwrap();
1130        assert!(
1131            residue.has_atom("HO5'"),
1132            "HO5' should be added for 5'-OH terminus"
1133        );
1134        assert!(!residue.has_atom("P"), "phosphorus should not exist");
1135    }
1136
1137    #[test]
1138    fn three_prime_nucleic_gets_ho3() {
1139        let residue = three_prime_residue(70);
1140        let mut structure = structure_with_residue(residue);
1141
1142        add_hydrogens(&mut structure, &HydroConfig::default()).expect("hydrogenation succeeds");
1143
1144        let residue = structure.find_residue("A", 70, None).unwrap();
1145        assert!(
1146            residue.has_atom("HO3'"),
1147            "HO3' should be added for 3' terminal"
1148        );
1149    }
1150}