bio_forge/ops/
hydro.rs

1//! Protonation-aware hydrogen placement for protein and nucleic acid structures.
2//!
3//! This module inspects residue templates, predicts protonation states from pH or hydrogen
4//! bonding networks, relabels residues accordingly, and rebuilds missing hydrogens while
5//! respecting polymer termini, nucleic acid priming, and disulfide bridges.
6
7use crate::db;
8use crate::model::{
9    atom::Atom,
10    grid::Grid,
11    residue::Residue,
12    structure::Structure,
13    types::{Element, Point, ResidueCategory, ResiduePosition, StandardResidue},
14};
15use crate::ops::error::Error;
16use crate::utils::parallel::*;
17use nalgebra::{Matrix3, Rotation3, Vector3};
18use rand::Rng;
19use std::collections::HashSet;
20
21/// Maximum sulfur–sulfur distance (Å) used to detect disulfide bridges.
22const DISULFIDE_SG_THRESHOLD: f64 = 2.2;
23/// Henderson–Hasselbalch breakpoint for protonated N-termini.
24const N_TERM_PKA: f64 = 8.0;
25/// Henderson–Hasselbalch breakpoint for protonated C-termini.
26const C_TERM_PKA: f64 = 3.1;
27/// Henderson–Hasselbalch breakpoint for the second dissociation of terminal phosphate.
28const PHOSPHATE_PKA2: f64 = 6.5;
29
30/// Parameters controlling hydrogen addition behavior.
31///
32/// `HydroConfig` can target a specific solution pH, remove pre-existing hydrogens, and
33/// choose how neutral histidine tautomers are assigned.
34#[derive(Debug, Clone)]
35pub struct HydroConfig {
36    /// Optional solvent pH value used for titration decisions.
37    pub target_ph: Option<f64>,
38    /// Whether to strip all existing hydrogens before reconstruction.
39    pub remove_existing_h: bool,
40    /// Strategy for setting neutral histidine tautomer labels.
41    pub his_strategy: HisStrategy,
42}
43
44impl Default for HydroConfig {
45    /// Provides biologically reasonable defaults (physiological pH, removal of old hydrogens,
46    /// and hydrogen-bond-aware histidine selection).
47    fn default() -> Self {
48        Self {
49            target_ph: None,
50            remove_existing_h: true,
51            his_strategy: HisStrategy::HbNetwork,
52        }
53    }
54}
55
56/// Strategies for choosing neutral histidine labels.
57#[derive(Debug, Clone, Copy, PartialEq, Eq)]
58pub enum HisStrategy {
59    /// Force all neutral histidines to HID (delta-protonated).
60    DirectHID,
61    /// Force all neutral histidines to HIE (epsilon-protonated).
62    DirectHIE,
63    /// Randomly choose between HID and HIE.
64    Random,
65    /// Analyze hydrogen-bond networks to select the tautomer most likely to bond.
66    HbNetwork,
67}
68
69/// Adds hydrogens to all standard residues in-place, updating protonation states when needed.
70///
71/// Disulfide bridges are detected prior to titration and preserved by relabeling cysteines to
72/// `CYX`. Residues lacking required anchor atoms will trigger descriptive errors so upstream
73/// cleanup steps can correct the issues.
74///
75/// # Arguments
76///
77/// * `structure` - Mutable structure whose residues will be protonated and hydrated.
78/// * `config` - Hydrogenation configuration, including pH target and histidine strategy.
79///
80/// # Returns
81///
82/// `Ok(())` when hydrogenation succeeds.
83///
84/// # Errors
85///
86/// Returns [`Error::MissingInternalTemplate`] when no template is found or
87/// [`Error::IncompleteResidueForHydro`] when required anchor atoms are missing.
88pub fn add_hydrogens(structure: &mut Structure, config: &HydroConfig) -> Result<(), Error> {
89    mark_disulfide_bridges(structure);
90
91    let acceptor_grid = if config.his_strategy == HisStrategy::HbNetwork {
92        Some(build_acceptor_grid(structure))
93    } else {
94        None
95    };
96
97    structure
98        .par_chains_mut()
99        .enumerate()
100        .try_for_each(|(c_idx, chain)| {
101            chain
102                .par_residues_mut()
103                .enumerate()
104                .try_for_each(|(r_idx, residue)| {
105                    if residue.category != ResidueCategory::Standard {
106                        return Ok(());
107                    }
108
109                    let new_name = determine_protonation_state(
110                        residue,
111                        config,
112                        acceptor_grid.as_ref(),
113                        Some((c_idx, r_idx)),
114                    );
115
116                    if let Some(name) = new_name {
117                        residue.name = name.into();
118                    }
119
120                    if config.remove_existing_h {
121                        residue.strip_hydrogens();
122                    }
123
124                    construct_hydrogens_for_residue(residue, config)
125                })
126        })
127}
128
129/// Builds a spatial grid of all nitrogen and oxygen atoms in the structure.
130///
131/// # Arguments
132///
133/// * `structure` - Structure from which to extract acceptor atoms.
134///
135/// # Returns
136///
137/// A `Grid` containing positions and residue indices of all N, O, and F atoms.
138fn build_acceptor_grid(structure: &Structure) -> Grid<(usize, usize)> {
139    let atoms: Vec<(Point, (usize, usize))> = structure
140        .iter_chains()
141        .enumerate()
142        .flat_map(|(c_idx, chain)| {
143            chain
144                .iter_residues()
145                .enumerate()
146                .flat_map(move |(r_idx, residue)| {
147                    residue
148                        .iter_atoms()
149                        .filter(|a| matches!(a.element, Element::N | Element::O | Element::F))
150                        .map(move |a| (a.pos, (c_idx, r_idx)))
151                })
152        })
153        .collect();
154    Grid::new(atoms, 3.5)
155}
156
157/// Predicts the protonation-induced residue rename for a given polymer residue.
158///
159/// Applies residue-specific pKa thresholds, handles histidine tautomer selection, and
160/// respects previously tagged disulfide cysteines.
161///
162/// # Arguments
163///
164/// * `residue` - Residue under evaluation.
165/// * `config` - Hydrogenation configuration containing pH and histidine options.
166/// * `grid` - Optional grid of acceptor atoms for HIS network analysis.
167/// * `indices` - Optional (chain_idx, res_idx) to exclude self-interactions.
168///
169/// # Returns
170///
171/// `Some(new_name)` when the residue should be relabeled; otherwise `None`.
172fn determine_protonation_state(
173    residue: &Residue,
174    config: &HydroConfig,
175    grid: Option<&Grid<(usize, usize)>>,
176    indices: Option<(usize, usize)>,
177) -> Option<String> {
178    let std = residue.standard_name?;
179
180    if std == StandardResidue::CYS && residue.name == "CYX" {
181        return None;
182    }
183
184    if let Some(ph) = config.target_ph {
185        return match std {
186            StandardResidue::ASP => Some(if ph < 3.9 {
187                "ASH".to_string()
188            } else {
189                "ASP".to_string()
190            }),
191            StandardResidue::GLU => Some(if ph < 4.2 {
192                "GLH".to_string()
193            } else {
194                "GLU".to_string()
195            }),
196            StandardResidue::LYS => Some(if ph > 10.5 {
197                "LYN".to_string()
198            } else {
199                "LYS".to_string()
200            }),
201            StandardResidue::ARG => Some(if ph > 12.5 {
202                "ARN".to_string()
203            } else {
204                "ARG".to_string()
205            }),
206            StandardResidue::CYS => Some(if ph > 8.3 {
207                "CYM".to_string()
208            } else {
209                "CYS".to_string()
210            }),
211            StandardResidue::TYR => Some(if ph > 10.0 {
212                "TYM".to_string()
213            } else {
214                "TYR".to_string()
215            }),
216            StandardResidue::HIS => {
217                if ph < 6.0 {
218                    Some("HIP".to_string())
219                } else {
220                    Some(select_neutral_his(
221                        residue,
222                        config.his_strategy,
223                        grid,
224                        indices,
225                    ))
226                }
227            }
228            _ => None,
229        };
230    }
231
232    if std == StandardResidue::HIS && matches!(residue.name.as_str(), "HIS" | "HID" | "HIE" | "HIP")
233    {
234        return Some(select_neutral_his(
235            residue,
236            config.his_strategy,
237            grid,
238            indices,
239        ));
240    }
241
242    None
243}
244
245/// Identifies cysteine pairs forming disulfide bonds and renames them to `CYX`.
246///
247/// # Arguments
248///
249/// * `structure` - Mutable structure containing residues to scan and relabel.
250fn mark_disulfide_bridges(structure: &mut Structure) {
251    let cys_sulfurs: Vec<(Point, (usize, usize))> = structure
252        .par_chains_mut()
253        .enumerate()
254        .flat_map(|(c_idx, chain)| {
255            chain
256                .par_residues_mut()
257                .enumerate()
258                .filter_map(move |(r_idx, residue)| {
259                    if matches!(residue.standard_name, Some(StandardResidue::CYS)) {
260                        residue.atom("SG").map(|sg| (sg.pos, (c_idx, r_idx)))
261                    } else {
262                        None
263                    }
264                })
265        })
266        .collect();
267
268    if cys_sulfurs.len() < 2 {
269        return;
270    }
271
272    let grid = Grid::new(cys_sulfurs.clone(), DISULFIDE_SG_THRESHOLD + 0.5);
273
274    let disulfide_residues: HashSet<(usize, usize)> = cys_sulfurs
275        .par_iter()
276        .flat_map_iter(|(pos, (c_idx, r_idx))| {
277            grid.neighbors(pos, DISULFIDE_SG_THRESHOLD)
278                .exact()
279                .filter_map(move |(_, &(neighbor_c, neighbor_r))| {
280                    if *c_idx == neighbor_c && *r_idx == neighbor_r {
281                        None
282                    } else {
283                        Some([(*c_idx, *r_idx), (neighbor_c, neighbor_r)])
284                    }
285                })
286        })
287        .flatten()
288        .collect();
289
290    if disulfide_residues.is_empty() {
291        return;
292    }
293
294    structure
295        .par_chains_mut()
296        .enumerate()
297        .for_each(|(c_idx, chain)| {
298            chain
299                .par_residues_mut()
300                .enumerate()
301                .for_each(|(r_idx, residue)| {
302                    if disulfide_residues.contains(&(c_idx, r_idx)) && residue.name != "CYX" {
303                        residue.name = "CYX".into();
304                    }
305                });
306        });
307}
308
309/// Selects a neutral histidine tautomer using the configured strategy.
310///
311/// # Arguments
312///
313/// * `residue` - Histidine residue being relabeled.
314/// * `strategy` - Selection strategy from [`HisStrategy`].
315/// * `grid` - Optional grid of acceptor atoms for HIS network analysis.
316/// * `indices` - Optional (chain_idx, res_idx) to exclude self-interactions.
317///
318/// # Returns
319///
320/// Either `"HID"` or `"HIE"`.
321fn select_neutral_his(
322    residue: &Residue,
323    strategy: HisStrategy,
324    grid: Option<&Grid<(usize, usize)>>,
325    indices: Option<(usize, usize)>,
326) -> String {
327    match strategy {
328        HisStrategy::DirectHID => "HID".to_string(),
329        HisStrategy::DirectHIE => "HIE".to_string(),
330        HisStrategy::Random => {
331            let mut rng = rand::rng();
332            if rng.random_bool(0.5) {
333                "HID".to_string()
334            } else {
335                "HIE".to_string()
336            }
337        }
338        HisStrategy::HbNetwork => optimize_his_network(residue, grid, indices),
339    }
340}
341
342/// Determines the best histidine tautomer by inspecting nearby hydrogen-bond acceptors.
343///
344/// # Arguments
345///
346/// * `residue` - Histidine residue being evaluated.
347/// * `grid` - Grid of acceptor atoms (N/O/F) in the structure.
348/// * `indices` - (chain_idx, res_idx) of the current residue.
349///
350/// # Returns
351///
352/// The histidine label (`"HID"` or `"HIE"`) that maximizes compatibility with neighbors.
353fn optimize_his_network(
354    residue: &Residue,
355    grid: Option<&Grid<(usize, usize)>>,
356    indices: Option<(usize, usize)>,
357) -> String {
358    let (grid, self_indices) = match (grid, indices) {
359        (Some(g), Some(i)) => (g, i),
360        _ => return "HIE".to_string(),
361    };
362
363    let score_hid = calculate_h_bond_score(residue, "ND1", "CG", "CE1", grid, self_indices);
364
365    let score_hie = calculate_h_bond_score(residue, "NE2", "CD2", "CE1", grid, self_indices);
366
367    if score_hid > score_hie {
368        "HID".to_string()
369    } else {
370        "HIE".to_string()
371    }
372}
373
374/// Calculates a geometric score for a potential hydrogen bond network.
375///
376/// Computes the hypothetical hydrogen position for an sp2 nitrogen and sums the
377/// scores of valid H-bonds formed with nearby acceptors. A valid H-bond must
378/// satisfy both distance (< 2.7Å H...A) and angle (> 90° N-H...A) constraints.
379///
380/// # Arguments
381///
382/// * `residue` - The residue containing the donor nitrogen.
383/// * `n_name` - Name of the nitrogen atom (donor).
384/// * `c1_name` - Name of the first neighbor carbon.
385/// * `c2_name` - Name of the second neighbor carbon.
386/// * `grid` - Spatial index of potential acceptors.
387/// * `self_idx` - Chain and residue index of the current residue to exclude self-interactions.
388///
389/// # Returns
390///
391/// A positive floating-point score, where higher indicates better H-bonding.
392fn calculate_h_bond_score(
393    residue: &Residue,
394    n_name: &str,
395    c1_name: &str,
396    c2_name: &str,
397    grid: &Grid<(usize, usize)>,
398    self_idx: (usize, usize),
399) -> f64 {
400    let n = match residue.atom(n_name) {
401        Some(a) => a,
402        None => return 0.0,
403    };
404    let c1 = match residue.atom(c1_name) {
405        Some(a) => a,
406        None => return 0.0,
407    };
408    let c2 = match residue.atom(c2_name) {
409        Some(a) => a,
410        None => return 0.0,
411    };
412
413    let v1 = (c1.pos - n.pos).normalize();
414    let v2 = (c2.pos - n.pos).normalize();
415    let bisector = (v1 + v2).normalize();
416    let h_dir = -bisector;
417    let h_pos = n.pos + h_dir;
418
419    let mut score = 0.0;
420
421    for (a_pos, &idx) in grid.neighbors(&n.pos, 3.5).exact() {
422        if idx == self_idx {
423            continue;
424        }
425
426        let h_a_vec = a_pos - h_pos;
427        let dist_sq = h_a_vec.norm_squared();
428
429        if dist_sq > 2.7 * 2.7 {
430            continue;
431        }
432
433        let h_a_dir = h_a_vec.normalize();
434        let cos_theta = h_dir.dot(&h_a_dir);
435
436        if cos_theta > 0.0 {
437            score += (1.0 / dist_sq) * (cos_theta * cos_theta);
438        }
439    }
440
441    score
442}
443
444/// Rebuilds hydrogens for a single residue using template geometry and terminal rules.
445///
446/// # Arguments
447///
448/// * `residue` - Residue to augment with hydrogens.
449/// * `config` - Hydrogenation configuration influencing terminal protonation.
450///
451/// # Returns
452///
453/// `Ok(())` when all hydrogens were added or already present.
454///
455/// # Errors
456///
457/// Returns [`Error::MissingInternalTemplate`] or [`Error::IncompleteResidueForHydro`] when
458/// required template data or anchor atoms are missing.
459fn construct_hydrogens_for_residue(
460    residue: &mut Residue,
461    config: &HydroConfig,
462) -> Result<(), Error> {
463    let template_name = residue.name.clone();
464
465    let template_view =
466        db::get_template(&template_name).ok_or_else(|| Error::MissingInternalTemplate {
467            res_name: template_name.to_string(),
468        })?;
469
470    let existing_atoms: HashSet<String> =
471        residue.atoms().iter().map(|a| a.name.to_string()).collect();
472
473    let rotation_override = if residue.standard_name == Some(StandardResidue::HOH) {
474        let mut rng = rand::rng();
475        Some(
476            Rotation3::from_axis_angle(
477                &Vector3::y_axis(),
478                rng.random_range(0.0..std::f64::consts::TAU),
479            ) * Rotation3::from_axis_angle(
480                &Vector3::x_axis(),
481                rng.random_range(0.0..std::f64::consts::TAU),
482            ),
483        )
484    } else {
485        None
486    };
487
488    for (h_name, h_tmpl_pos, anchors_iter) in template_view.hydrogens() {
489        if existing_atoms.contains(h_name) {
490            continue;
491        }
492
493        let anchors: Vec<&str> = anchors_iter.collect();
494        if let Ok(pos) = reconstruct_geometry(residue, h_tmpl_pos, &anchors, rotation_override) {
495            residue.add_atom(Atom::new(h_name, Element::H, pos));
496        } else {
497            return Err(Error::incomplete_for_hydro(
498                &*residue.name,
499                residue.id,
500                anchors.first().copied().unwrap_or("?"),
501            ));
502        }
503    }
504
505    match residue.position {
506        ResiduePosition::NTerminal if residue.standard_name.is_some_and(|s| s.is_protein()) => {
507            construct_n_term_hydrogens(residue, n_term_should_be_protonated(config))?;
508        }
509        ResiduePosition::CTerminal if residue.standard_name.is_some_and(|s| s.is_protein()) => {
510            construct_c_term_hydrogen(residue, c_term_should_be_protonated(config))?;
511        }
512        ResiduePosition::ThreePrime if residue.standard_name.is_some_and(|s| s.is_nucleic()) => {
513            construct_3_prime_hydrogen(residue)?;
514        }
515        ResiduePosition::FivePrime if residue.standard_name.is_some_and(|s| s.is_nucleic()) => {
516            if residue.has_atom("P") {
517                construct_5_prime_phosphate_hydrogens(residue, config)?;
518            } else if residue.has_atom("O5'") {
519                construct_5_prime_hydrogen(residue)?;
520            }
521        }
522        _ => {}
523    }
524
525    Ok(())
526}
527
528/// Evaluates whether an N-terminus should remain protonated under the configured pH.
529///
530/// # Arguments
531///
532/// * `config` - Hydrogenation configuration that may provide a target pH.
533///
534/// # Returns
535///
536/// `true` when the pH is below the configured threshold or unspecified.
537fn n_term_should_be_protonated(config: &HydroConfig) -> bool {
538    config.target_ph.map(|ph| ph < N_TERM_PKA).unwrap_or(true)
539}
540
541/// Evaluates whether a C-terminus should remain protonated under the configured pH.
542///
543/// # Arguments
544///
545/// * `config` - Hydrogenation configuration potentially specifying pH.
546///
547/// # Returns
548///
549/// `true` when the pH is below the acidic cutoff; otherwise `false`.
550fn c_term_should_be_protonated(config: &HydroConfig) -> bool {
551    config.target_ph.map(|ph| ph < C_TERM_PKA).unwrap_or(false)
552}
553
554/// Aligns template coordinates to residue anchors to predict a hydrogen position.
555///
556/// # Arguments
557///
558/// * `residue` - Residue containing measured anchor atoms.
559/// * `target_tmpl_pos` - Target hydrogen position from the template.
560/// * `anchor_names` - Atom names used to determine the rigid-body transform.
561/// * `rotation_override` - Optional rotation to use if the system is under-constrained (e.g. water).
562///
563/// # Returns
564///
565/// `Ok(point)` containing the placed coordinate or `Err(())` if anchors are missing.
566fn reconstruct_geometry(
567    residue: &Residue,
568    target_tmpl_pos: Point,
569    anchor_names: &[&str],
570    rotation_override: Option<Rotation3<f64>>,
571) -> Result<Point, ()> {
572    let template_view = db::get_template(&residue.name).ok_or(())?;
573
574    let mut residue_pts = Vec::new();
575    let mut template_pts = Vec::new();
576
577    for name in anchor_names {
578        let r_atom = residue.atom(name).ok_or(())?;
579        residue_pts.push(r_atom.pos);
580
581        let t_pos = template_view
582            .heavy_atoms()
583            .find(|(n, _, _)| n == name)
584            .map(|(_, _, p)| p)
585            .ok_or(())?;
586        template_pts.push(t_pos);
587    }
588
589    let (mut rot, mut trans) = calculate_transform(&residue_pts, &template_pts).ok_or(())?;
590
591    if let (Some(override_rot), 1) = (rotation_override, anchor_names.len()) {
592        rot = override_rot.into_inner();
593        trans = residue_pts[0].coords - rot * template_pts[0].coords;
594    }
595
596    Ok(rot * target_tmpl_pos + trans)
597}
598
599/// Rebuilds the N-terminal amine hydrogens using tetrahedral geometry.
600///
601/// # Arguments
602///
603/// * `residue` - Residue whose terminal hydrogens are being reconstructed.
604/// * `protonated` - Whether to place three hydrogens (`true`) or two (`false`).
605///
606/// # Returns
607///
608/// `Ok(())` when hydrogens are successfully placed.
609///
610/// # Errors
611///
612/// Returns [`Error::IncompleteResidueForHydro`] if the N or CA atoms are missing.
613fn construct_n_term_hydrogens(residue: &mut Residue, protonated: bool) -> Result<(), Error> {
614    residue.remove_atom("H");
615    residue.remove_atom("H1");
616    residue.remove_atom("H2");
617    residue.remove_atom("H3");
618
619    let n_pos = residue
620        .atom("N")
621        .ok_or_else(|| Error::incomplete_for_hydro(&*residue.name, residue.id, "N"))?
622        .pos;
623    let ca_pos = residue
624        .atom("CA")
625        .ok_or_else(|| Error::incomplete_for_hydro(&*residue.name, residue.id, "CA"))?
626        .pos;
627
628    let v_ca_n = (n_pos - ca_pos).normalize();
629    let bond_len = 1.0;
630    let angle = 109.5_f64.to_radians();
631    let sin_a = angle.sin();
632    let cos_a = angle.cos();
633
634    let up = if v_ca_n.x.abs() < 0.9 {
635        Vector3::x()
636    } else {
637        Vector3::y()
638    };
639    let v_perp = v_ca_n.cross(&up).normalize();
640    let base_dir = v_ca_n.scale(cos_a) + v_perp.scale(sin_a);
641
642    let phases = [0.0_f64, 120.0, 240.0];
643    let mut candidates: Vec<Vector3<f64>> = phases
644        .iter()
645        .map(|deg| {
646            let rot_axis = Rotation3::from_axis_angle(
647                &nalgebra::Unit::new_normalize(v_ca_n),
648                deg.to_radians(),
649            );
650            rot_axis * base_dir
651        })
652        .collect();
653
654    candidates.sort_by(|a, b| {
655        a.dot(&v_ca_n)
656            .partial_cmp(&b.dot(&v_ca_n))
657            .unwrap_or(std::cmp::Ordering::Equal)
658    });
659
660    let target_count = if protonated { 3 } else { 2 };
661    let names = ["H1", "H2", "H3"];
662    for (idx, dir) in candidates.into_iter().take(target_count).enumerate() {
663        residue.add_atom(Atom::new(
664            names[idx],
665            Element::H,
666            n_pos + dir.scale(bond_len),
667        ));
668    }
669
670    Ok(())
671}
672
673/// Rebuilds the carboxylate proton at the C-terminus when protonated.
674///
675/// # Arguments
676///
677/// * `residue` - Residue to modify.
678/// * `protonated` - Whether the terminus should include the `HOXT` hydrogen.
679///
680/// # Returns
681///
682/// `Ok(())` after either removing or adding hydrogens as needed.
683///
684/// # Errors
685///
686/// Returns [`Error::IncompleteResidueForHydro`] if the `C` or `OXT` atoms are missing.
687fn construct_c_term_hydrogen(residue: &mut Residue, protonated: bool) -> Result<(), Error> {
688    if !protonated {
689        residue.remove_atom("HOXT");
690        residue.remove_atom("HXT");
691        return Ok(());
692    }
693
694    residue.remove_atom("HXT");
695    if residue.has_atom("HOXT") {
696        return Ok(());
697    }
698
699    let c_pos = residue
700        .atom("C")
701        .ok_or_else(|| Error::incomplete_for_hydro(&*residue.name, residue.id, "C"))?
702        .pos;
703    let oxt_pos = residue
704        .atom("OXT")
705        .ok_or_else(|| Error::incomplete_for_hydro(&*residue.name, residue.id, "OXT"))?
706        .pos;
707
708    let direction = oxt_pos - c_pos;
709    if direction.norm_squared() < 1e-6 {
710        return Err(Error::incomplete_for_hydro(
711            &*residue.name,
712            residue.id,
713            "OXT",
714        ));
715    }
716    let dir = direction.normalize();
717    let h_pos = oxt_pos + dir.scale(0.97);
718
719    residue.add_atom(Atom::new("HOXT", Element::H, h_pos));
720    Ok(())
721}
722
723/// Adds the 3'-terminal hydroxyl hydrogen to nucleic acid residues when absent.
724///
725/// # Arguments
726///
727/// * `residue` - Residue representing a nucleic acid terminal unit.
728///
729/// # Returns
730///
731/// `Ok(())` when the hydrogen is added or already present.
732///
733/// # Errors
734///
735/// Returns [`Error::IncompleteResidueForHydro`] if required sugar atoms are missing.
736fn construct_3_prime_hydrogen(residue: &mut Residue) -> Result<(), Error> {
737    if residue.has_atom("HO3'") {
738        return Ok(());
739    }
740
741    let o3 = residue
742        .atom("O3'")
743        .ok_or_else(|| Error::incomplete_for_hydro(&*residue.name, residue.id, "O3'"))?
744        .pos;
745    let c3 = residue
746        .atom("C3'")
747        .ok_or_else(|| Error::incomplete_for_hydro(&*residue.name, residue.id, "C3'"))?
748        .pos;
749    let c4 = residue
750        .atom("C4'")
751        .or_else(|| residue.atom("C2'"))
752        .map(|a| a.pos)
753        .unwrap_or_else(|| c3 + Vector3::x());
754
755    let v_c3_o3 = (o3 - c3).normalize();
756
757    let v_c4_c3 = (c3 - c4).normalize();
758    let normal = v_c3_o3.cross(&v_c4_c3).normalize();
759
760    let h_dir = (v_c3_o3 + normal).normalize();
761    let h_pos = o3 + h_dir.scale(0.96);
762
763    residue.add_atom(Atom::new("HO3'", Element::H, h_pos));
764    Ok(())
765}
766
767/// Adds the 5'-terminal hydroxyl hydrogen for nucleic acid residues lacking phosphates.
768///
769/// # Arguments
770///
771/// * `residue` - Residue whose 5' terminus is being hydrated.
772///
773/// # Returns
774///
775/// `Ok(())` when the hydrogen is added or already exists.
776///
777/// # Errors
778///
779/// Returns [`Error::IncompleteResidueForHydro`] if sugar anchor atoms are missing.
780fn construct_5_prime_hydrogen(residue: &mut Residue) -> Result<(), Error> {
781    if residue.has_atom("HO5'") {
782        return Ok(());
783    }
784
785    let o5 = residue
786        .atom("O5'")
787        .ok_or_else(|| Error::incomplete_for_hydro(&*residue.name, residue.id, "O5'"))?
788        .pos;
789    let c5 = residue
790        .atom("C5'")
791        .ok_or_else(|| Error::incomplete_for_hydro(&*residue.name, residue.id, "C5'"))?
792        .pos;
793
794    let v_c5_o5 = (o5 - c5).normalize();
795    let aux = if v_c5_o5.z.abs() < 0.9 {
796        Vector3::z()
797    } else {
798        Vector3::x()
799    };
800    let perp = v_c5_o5.cross(&aux).normalize();
801
802    let h_dir = (v_c5_o5 + perp).normalize();
803    let h_pos = o5 + h_dir.scale(0.96);
804
805    residue.add_atom(Atom::new("HO5'", Element::H, h_pos));
806    Ok(())
807}
808
809/// Adds hydrogens to 5'-terminal phosphate groups based on pH.
810///
811/// At physiological pH (≥6.5), the terminal phosphate carries two negative charges and
812/// requires no protons. Below this threshold, one proton is added to OP3. The function
813/// respects existing hydrogens when `remove_existing_h` is disabled.
814///
815/// # Arguments
816///
817/// * `residue` - Nucleic acid residue with a 5'-terminal phosphate.
818/// * `config` - Hydrogenation configuration containing pH settings.
819///
820/// # Returns
821///
822/// `Ok(())` when hydrogens are appropriately placed or removed.
823///
824/// # Errors
825///
826/// Returns [`Error::IncompleteResidueForHydro`] if phosphate atoms are missing.
827fn construct_5_prime_phosphate_hydrogens(
828    residue: &mut Residue,
829    config: &HydroConfig,
830) -> Result<(), Error> {
831    let ph = config.target_ph.unwrap_or(7.4);
832
833    if ph >= PHOSPHATE_PKA2 {
834        residue.remove_atom("HOP3");
835        residue.remove_atom("HOP2");
836        return Ok(());
837    }
838
839    if residue.has_atom("HOP3") {
840        return Ok(());
841    }
842
843    let op3 = residue
844        .atom("OP3")
845        .ok_or_else(|| Error::incomplete_for_hydro(&*residue.name, residue.id, "OP3"))?
846        .pos;
847    let p = residue
848        .atom("P")
849        .ok_or_else(|| Error::incomplete_for_hydro(&*residue.name, residue.id, "P"))?
850        .pos;
851
852    let direction = (op3 - p).normalize();
853    let h_pos = op3 + direction * 0.96;
854
855    residue.add_atom(Atom::new("HOP3", Element::H, h_pos));
856    Ok(())
857}
858
859/// Computes the optimal rigid transform mapping template anchor points to residue atoms.
860///
861/// Uses Kabsch alignment with safeguards for one- and two-point configurations.
862///
863/// # Arguments
864///
865/// * `r_pts` - Coordinates from the residue.
866/// * `t_pts` - Corresponding coordinates from the template.
867///
868/// # Returns
869///
870/// `Some((rotation, translation))` when alignment succeeds; otherwise `None`.
871fn calculate_transform(r_pts: &[Point], t_pts: &[Point]) -> Option<(Matrix3<f64>, Vector3<f64>)> {
872    let n = r_pts.len();
873    if n != t_pts.len() || n == 0 {
874        return None;
875    }
876
877    let r_center = r_pts.iter().map(|p| p.coords).sum::<Vector3<f64>>() / n as f64;
878    let t_center = t_pts.iter().map(|p| p.coords).sum::<Vector3<f64>>() / n as f64;
879
880    if n == 1 {
881        return Some((Matrix3::identity(), r_center - t_center));
882    }
883
884    if n == 2 {
885        let v_r = r_pts[1] - r_pts[0];
886        let v_t = t_pts[1] - t_pts[0];
887        let rot = Rotation3::rotation_between(&v_t, &v_r).unwrap_or_else(Rotation3::identity);
888        let trans = r_center - rot * t_center;
889        return Some((rot.into_inner(), trans));
890    }
891
892    let mut cov = Matrix3::zeros();
893    for (p_r, p_t) in r_pts.iter().zip(t_pts.iter()) {
894        let v_r = p_r.coords - r_center;
895        let v_t = p_t.coords - t_center;
896        cov += v_r * v_t.transpose();
897    }
898
899    let svd = cov.svd(true, true);
900    let u = svd.u?;
901    let v_t = svd.v_t?;
902
903    let mut rot = u * v_t;
904    if rot.determinant() < 0.0 {
905        let mut corr = Matrix3::identity();
906        corr[(2, 2)] = -1.0;
907        rot = u * corr * v_t;
908    }
909
910    let trans = r_center - rot * t_center;
911    Some((rot, trans))
912}
913
914#[cfg(test)]
915mod tests {
916    use super::*;
917    use crate::model::{
918        atom::Atom,
919        chain::Chain,
920        residue::Residue,
921        types::{Element, Point, ResidueCategory, ResiduePosition, StandardResidue},
922    };
923
924    fn residue_from_template(name: &str, std: StandardResidue, id: i32) -> Residue {
925        let template = db::get_template(name).unwrap_or_else(|| panic!("missing template {name}"));
926        let mut residue = Residue::new(id, None, name, Some(std), ResidueCategory::Standard);
927        residue.position = ResiduePosition::Internal;
928        for (atom_name, element, pos) in template.heavy_atoms() {
929            residue.add_atom(Atom::new(atom_name, element, pos));
930        }
931        residue
932    }
933
934    fn structure_with_residue(residue: Residue) -> Structure {
935        let mut chain = Chain::new("A");
936        chain.add_residue(residue);
937        let mut structure = Structure::new();
938        structure.add_chain(chain);
939        structure
940    }
941
942    fn structure_with_residues(residues: Vec<Residue>) -> Structure {
943        let mut chain = Chain::new("A");
944        for residue in residues {
945            chain.add_residue(residue);
946        }
947        let mut structure = Structure::new();
948        structure.add_chain(chain);
949        structure
950    }
951
952    fn n_terminal_residue(id: i32) -> Residue {
953        let mut residue = residue_from_template("ALA", StandardResidue::ALA, id);
954        residue.position = ResiduePosition::NTerminal;
955        residue
956    }
957
958    fn c_terminal_residue(id: i32) -> Residue {
959        let mut residue = residue_from_template("ALA", StandardResidue::ALA, id);
960        residue.position = ResiduePosition::CTerminal;
961        let c_pos = residue.atom("C").expect("C atom").pos;
962        let o_pos = residue.atom("O").expect("O atom").pos;
963        let offset = c_pos - o_pos;
964        let oxt_pos = c_pos + offset;
965        residue.add_atom(Atom::new("OXT", Element::O, oxt_pos));
966        residue
967    }
968
969    fn five_prime_residue_with_phosphate(id: i32) -> Residue {
970        let template = db::get_template("DA").unwrap();
971        let mut residue = Residue::new(
972            id,
973            None,
974            "DA",
975            Some(StandardResidue::DA),
976            ResidueCategory::Standard,
977        );
978        residue.position = ResiduePosition::FivePrime;
979        for (atom_name, element, pos) in template.heavy_atoms() {
980            residue.add_atom(Atom::new(atom_name, element, pos));
981        }
982        let p_pos = residue.atom("P").unwrap().pos;
983        let op1_pos = residue.atom("OP1").unwrap().pos;
984        let op2_pos = residue.atom("OP2").unwrap().pos;
985        let o5_pos = residue.atom("O5'").unwrap().pos;
986        let centroid = (op1_pos.coords + op2_pos.coords + o5_pos.coords) / 3.0;
987        let direction = (p_pos.coords - centroid).normalize();
988        let op3_pos = p_pos + direction * 1.48;
989        residue.add_atom(Atom::new("OP3", Element::O, op3_pos));
990        residue
991    }
992
993    fn five_prime_residue_without_phosphate(id: i32) -> Residue {
994        let template = db::get_template("DA").unwrap();
995        let mut residue = Residue::new(
996            id,
997            None,
998            "DA",
999            Some(StandardResidue::DA),
1000            ResidueCategory::Standard,
1001        );
1002        residue.position = ResiduePosition::FivePrime;
1003        for (atom_name, element, pos) in template.heavy_atoms() {
1004            if !matches!(atom_name, "P" | "OP1" | "OP2") {
1005                residue.add_atom(Atom::new(atom_name, element, pos));
1006            }
1007        }
1008        residue
1009    }
1010
1011    fn three_prime_residue(id: i32) -> Residue {
1012        let template = db::get_template("DA").unwrap();
1013        let mut residue = Residue::new(
1014            id,
1015            None,
1016            "DA",
1017            Some(StandardResidue::DA),
1018            ResidueCategory::Standard,
1019        );
1020        residue.position = ResiduePosition::ThreePrime;
1021        for (atom_name, element, pos) in template.heavy_atoms() {
1022            residue.add_atom(Atom::new(atom_name, element, pos));
1023        }
1024        residue
1025    }
1026
1027    #[test]
1028    fn titratable_templates_exist_in_database() {
1029        let expected = [
1030            "ASP", "ASH", "GLU", "GLH", "LYS", "LYN", "ARG", "ARN", "CYS", "CYM", "TYR", "TYM",
1031            "HID", "HIE", "HIP",
1032        ];
1033
1034        for name in expected {
1035            assert!(
1036                db::get_template(name).is_some(),
1037                "template {name} should exist"
1038            );
1039        }
1040    }
1041
1042    #[test]
1043    fn determine_protonation_state_tracks_pka_thresholds() {
1044        let structure =
1045            structure_with_residue(residue_from_template("ASP", StandardResidue::ASP, 1));
1046        let mut config = HydroConfig {
1047            target_ph: Some(2.5),
1048            ..HydroConfig::default()
1049        };
1050        assert_eq!(
1051            determine_protonation_state(
1052                structure
1053                    .iter_chains()
1054                    .next()
1055                    .unwrap()
1056                    .iter_residues()
1057                    .next()
1058                    .unwrap(),
1059                &config,
1060                None,
1061                None
1062            ),
1063            Some("ASH".to_string())
1064        );
1065
1066        config.target_ph = Some(5.0);
1067        assert_eq!(
1068            determine_protonation_state(
1069                structure
1070                    .iter_chains()
1071                    .next()
1072                    .unwrap()
1073                    .iter_residues()
1074                    .next()
1075                    .unwrap(),
1076                &config,
1077                None,
1078                None
1079            ),
1080            Some("ASP".to_string())
1081        );
1082
1083        let structure =
1084            structure_with_residue(residue_from_template("LYS", StandardResidue::LYS, 2));
1085        config.target_ph = Some(11.0);
1086        assert_eq!(
1087            determine_protonation_state(
1088                structure
1089                    .iter_chains()
1090                    .next()
1091                    .unwrap()
1092                    .iter_residues()
1093                    .next()
1094                    .unwrap(),
1095                &config,
1096                None,
1097                None
1098            ),
1099            Some("LYN".to_string())
1100        );
1101
1102        config.target_ph = Some(7.0);
1103        assert_eq!(
1104            determine_protonation_state(
1105                structure
1106                    .iter_chains()
1107                    .next()
1108                    .unwrap()
1109                    .iter_residues()
1110                    .next()
1111                    .unwrap(),
1112                &config,
1113                None,
1114                None
1115            ),
1116            Some("LYS".to_string())
1117        );
1118    }
1119
1120    #[test]
1121    fn determine_protonation_state_respects_his_strategy() {
1122        let mut residue = residue_from_template("HID", StandardResidue::HIS, 3);
1123        residue.name = "HIS".into();
1124        let structure = structure_with_residue(residue);
1125
1126        let config = HydroConfig {
1127            target_ph: Some(7.0),
1128            his_strategy: HisStrategy::DirectHIE,
1129            ..HydroConfig::default()
1130        };
1131
1132        assert_eq!(
1133            determine_protonation_state(
1134                structure
1135                    .iter_chains()
1136                    .next()
1137                    .unwrap()
1138                    .iter_residues()
1139                    .next()
1140                    .unwrap(),
1141                &config,
1142                None,
1143                None
1144            ),
1145            Some("HIE".to_string())
1146        );
1147
1148        let mut acid_config = HydroConfig::default();
1149        acid_config.target_ph = Some(5.5);
1150        assert_eq!(
1151            determine_protonation_state(
1152                structure
1153                    .iter_chains()
1154                    .next()
1155                    .unwrap()
1156                    .iter_residues()
1157                    .next()
1158                    .unwrap(),
1159                &acid_config,
1160                None,
1161                None
1162            ),
1163            Some("HIP".to_string())
1164        );
1165    }
1166
1167    #[test]
1168    fn n_terminal_defaults_to_protonated_without_ph() {
1169        let residue = n_terminal_residue(40);
1170        let mut structure = structure_with_residue(residue);
1171
1172        add_hydrogens(&mut structure, &HydroConfig::default()).expect("hydrogenation succeeds");
1173
1174        let residue = structure.find_residue("A", 40, None).unwrap();
1175        assert!(residue.has_atom("H1"));
1176        assert!(residue.has_atom("H2"));
1177        assert!(residue.has_atom("H3"));
1178    }
1179
1180    #[test]
1181    fn n_terminal_deprotonates_above_pka() {
1182        let residue = n_terminal_residue(41);
1183        let mut structure = structure_with_residue(residue);
1184        let mut config = HydroConfig::default();
1185        config.target_ph = Some(9.0);
1186
1187        add_hydrogens(&mut structure, &config).expect("hydrogenation succeeds");
1188
1189        let residue = structure.find_residue("A", 41, None).unwrap();
1190        assert!(residue.has_atom("H1"));
1191        assert!(residue.has_atom("H2"));
1192        assert!(!residue.has_atom("H3"));
1193    }
1194
1195    #[test]
1196    fn c_terminal_protonates_under_acidic_ph() {
1197        let residue = c_terminal_residue(50);
1198        let mut structure = structure_with_residue(residue);
1199        let mut config = HydroConfig::default();
1200        config.target_ph = Some(2.5);
1201
1202        add_hydrogens(&mut structure, &config).expect("hydrogenation succeeds");
1203
1204        let residue = structure.find_residue("A", 50, None).unwrap();
1205        assert!(residue.has_atom("HOXT"));
1206    }
1207
1208    #[test]
1209    fn c_terminal_remains_deprotonated_at_physiological_ph() {
1210        let residue = c_terminal_residue(51);
1211        let mut structure = structure_with_residue(residue);
1212
1213        add_hydrogens(&mut structure, &HydroConfig::default()).expect("hydrogenation succeeds");
1214
1215        let residue = structure.find_residue("A", 51, None).unwrap();
1216        assert!(!residue.has_atom("HOXT"));
1217    }
1218
1219    #[test]
1220    fn determine_protonation_state_skips_already_marked_cyx() {
1221        let mut residue = residue_from_template("CYS", StandardResidue::CYS, 25);
1222        residue.name = "CYX".into();
1223        let structure = structure_with_residue(residue);
1224        let mut config = HydroConfig::default();
1225        config.target_ph = Some(9.0);
1226
1227        assert_eq!(
1228            determine_protonation_state(
1229                structure
1230                    .iter_chains()
1231                    .next()
1232                    .unwrap()
1233                    .iter_residues()
1234                    .next()
1235                    .unwrap(),
1236                &config,
1237                None,
1238                None
1239            ),
1240            None
1241        );
1242    }
1243
1244    #[test]
1245    fn add_hydrogens_populates_internal_lysine_side_chain() {
1246        let mut residue = residue_from_template("LYS", StandardResidue::LYS, 10);
1247        residue.add_atom(Atom::new("FAKE", Element::H, Point::origin()));
1248        let mut structure = structure_with_residue(residue);
1249
1250        add_hydrogens(&mut structure, &HydroConfig::default()).expect("hydrogenation succeeds");
1251
1252        let residue = structure.find_residue("A", 10, None).unwrap();
1253        assert!(residue.has_atom("HZ1"));
1254        assert!(residue.has_atom("HZ2"));
1255        assert!(residue.has_atom("HZ3"));
1256        assert!(!residue.has_atom("FAKE"));
1257    }
1258
1259    #[test]
1260    fn add_hydrogens_relabels_asp_under_acidic_ph() {
1261        let residue = residue_from_template("ASP", StandardResidue::ASP, 15);
1262        let mut structure = structure_with_residue(residue);
1263        let mut config = HydroConfig::default();
1264        config.target_ph = Some(2.0);
1265
1266        add_hydrogens(&mut structure, &config).expect("hydrogenation succeeds");
1267
1268        let residue = structure.find_residue("A", 15, None).unwrap();
1269        assert_eq!(residue.name, "ASH");
1270        assert!(residue.has_atom("HD2"));
1271    }
1272
1273    #[test]
1274    fn construct_hydrogens_errors_when_anchor_missing() {
1275        let template = db::get_template("ALA").expect("template ALA");
1276        let mut residue = Residue::new(
1277            20,
1278            None,
1279            "ALA",
1280            Some(StandardResidue::ALA),
1281            ResidueCategory::Standard,
1282        );
1283        residue.position = ResiduePosition::Internal;
1284
1285        let (name, element, pos) = template.heavy_atoms().next().unwrap();
1286        residue.add_atom(Atom::new(name, element, pos));
1287
1288        let err = construct_hydrogens_for_residue(&mut residue, &HydroConfig::default())
1289            .expect_err("should fail");
1290        assert!(matches!(err, Error::IncompleteResidueForHydro { .. }));
1291    }
1292
1293    #[test]
1294    fn close_cysteines_are_relabelled_to_cyx_and_skip_hydrogens() {
1295        let cys1 = residue_from_template("CYS", StandardResidue::CYS, 30);
1296        let mut cys2 = residue_from_template("CYS", StandardResidue::CYS, 31);
1297
1298        let sg1 = cys1.atom("SG").expect("SG in cys1").pos;
1299        let sg2 = cys2.atom("SG").expect("SG in cys2").pos;
1300        let desired = sg1 + Vector3::new(0.5, 0.0, 0.0);
1301        let offset = desired - sg2;
1302        for atom in cys2.iter_atoms_mut() {
1303            atom.translate_by(&offset);
1304        }
1305
1306        let mut structure = structure_with_residues(vec![cys1, cys2]);
1307        add_hydrogens(&mut structure, &HydroConfig::default()).expect("hydrogenation");
1308
1309        let res1 = structure.find_residue("A", 30, None).unwrap();
1310        let res2 = structure.find_residue("A", 31, None).unwrap();
1311        assert_eq!(res1.name, "CYX");
1312        assert_eq!(res2.name, "CYX");
1313        assert!(!res1.has_atom("HG"));
1314        assert!(!res2.has_atom("HG"));
1315    }
1316
1317    #[test]
1318    fn five_prime_phosphate_deprotonated_at_physiological_ph() {
1319        let residue = five_prime_residue_with_phosphate(60);
1320        let mut structure = structure_with_residue(residue);
1321
1322        add_hydrogens(&mut structure, &HydroConfig::default()).expect("hydrogenation succeeds");
1323
1324        let residue = structure.find_residue("A", 60, None).unwrap();
1325        assert!(residue.has_atom("OP3"), "OP3 should remain");
1326        assert!(
1327            !residue.has_atom("HOP3"),
1328            "HOP3 should not exist at neutral pH"
1329        );
1330        assert!(
1331            !residue.has_atom("HOP2"),
1332            "HOP2 should not exist at neutral pH"
1333        );
1334    }
1335
1336    #[test]
1337    fn five_prime_phosphate_protonated_below_pka() {
1338        let residue = five_prime_residue_with_phosphate(61);
1339        let mut structure = structure_with_residue(residue);
1340        let mut config = HydroConfig::default();
1341        config.target_ph = Some(5.5);
1342
1343        add_hydrogens(&mut structure, &config).expect("hydrogenation succeeds");
1344
1345        let residue = structure.find_residue("A", 61, None).unwrap();
1346        assert!(residue.has_atom("OP3"), "OP3 should remain");
1347        assert!(residue.has_atom("HOP3"), "HOP3 should be added below pKa");
1348    }
1349
1350    #[test]
1351    fn five_prime_without_phosphate_gets_ho5() {
1352        let residue = five_prime_residue_without_phosphate(62);
1353        let mut structure = structure_with_residue(residue);
1354
1355        add_hydrogens(&mut structure, &HydroConfig::default()).expect("hydrogenation succeeds");
1356
1357        let residue = structure.find_residue("A", 62, None).unwrap();
1358        assert!(
1359            residue.has_atom("HO5'"),
1360            "HO5' should be added for 5'-OH terminus"
1361        );
1362        assert!(!residue.has_atom("P"), "phosphorus should not exist");
1363    }
1364
1365    #[test]
1366    fn three_prime_nucleic_gets_ho3() {
1367        let residue = three_prime_residue(70);
1368        let mut structure = structure_with_residue(residue);
1369
1370        add_hydrogens(&mut structure, &HydroConfig::default()).expect("hydrogenation succeeds");
1371
1372        let residue = structure.find_residue("A", 70, None).unwrap();
1373        assert!(
1374            residue.has_atom("HO3'"),
1375            "HO3' should be added for 3' terminal"
1376        );
1377    }
1378}