Skip to main content

bio_forge/ops/
topology.rs

1//! Bond topology builder used to reconstruct intra- and inter-residue connectivity.
2//!
3//! The routines in this module consume a fully prepared [`Structure`] and emit a
4//! [`Topology`] that encodes all covalent bonds. Internal templates are used for
5//! standard residues, while callers can provide additional hetero templates.
6//! Beyond template-driven intra-residue bonds, the builder also infers peptide,
7//! nucleic-backbone, terminal, and disulfide bonds using geometric thresholds.
8
9use crate::db;
10use crate::model::{
11    grid::Grid,
12    structure::Structure,
13    template::Template,
14    topology::{Bond, Topology},
15    types::{BondOrder, ResidueCategory, ResiduePosition},
16};
17use crate::ops::error::Error;
18use crate::utils::parallel::*;
19use std::collections::HashMap;
20
21/// Builder responsible for creating [`Topology`] objects from a [`Structure`].
22///
23/// The builder can augment the internal template database with additional
24/// hetero templates and tweak geometric cutoffs for disulfide, peptide, and
25/// nucleic bonds before running [`TopologyBuilder::build`].
26pub struct TopologyBuilder {
27    hetero_templates: HashMap<String, Template>,
28    disulfide_bond_cutoff: f64,
29    peptide_bond_cutoff: f64,
30    nucleic_bond_cutoff: f64,
31}
32
33impl Default for TopologyBuilder {
34    fn default() -> Self {
35        Self {
36            hetero_templates: HashMap::new(),
37            disulfide_bond_cutoff: 2.2,
38            peptide_bond_cutoff: 1.5,
39            nucleic_bond_cutoff: 1.8,
40        }
41    }
42}
43
44impl TopologyBuilder {
45    /// Creates a builder that uses default cutoffs and no extra templates.
46    ///
47    /// Default cutoffs were chosen to capture typical covalent geometry in
48    /// Ångström. They can be overridden through the builder-style setters.
49    pub fn new() -> Self {
50        Self::default()
51    }
52
53    /// Registers a new template that will be used for hetero residues.
54    ///
55    /// # Arguments
56    ///
57    /// * `template` - Template describing atoms and bonds for a residue that
58    ///   is classified as `ResidueCategory::Hetero`.
59    pub fn add_hetero_template(mut self, template: Template) -> Self {
60        self.hetero_templates
61            .insert(template.name.clone(), template);
62        self
63    }
64
65    /// Configures the maximum SG···SG distance allowed for disulfide bonds.
66    ///
67    /// # Arguments
68    ///
69    /// * `cutoff` - Maximum distance in Ångström for connecting cysteine SG
70    ///   atoms.
71    pub fn disulfide_cutoff(mut self, cutoff: f64) -> Self {
72        self.disulfide_bond_cutoff = cutoff;
73        self
74    }
75
76    /// Builds a [`Topology`] for the provided structure.
77    ///
78    /// All intra-residue bonds are taken from templates and terminal rules,
79    /// while inter-residue bonds rely on distance checks using the configured
80    /// cutoffs.
81    ///
82    /// # Arguments
83    ///
84    /// * `structure` - Structure for which to build the bond topology.
85    ///
86    /// # Returns
87    ///
88    /// A `Result` containing the built [`Topology`] or an [`Error`].
89    ///
90    /// # Errors
91    ///
92    /// Returns [`Error`] when a required template or atom is missing.
93    pub fn build(self, structure: Structure) -> Result<Topology, Error> {
94        let mut chain_offsets = Vec::with_capacity(structure.chain_count());
95        let mut current_offset = 0;
96        for chain in structure.iter_chains() {
97            chain_offsets.push(current_offset);
98            current_offset += chain.atom_count();
99        }
100
101        assert_eq!(chain_offsets.len(), structure.chain_count());
102
103        let hetero_templates = &self.hetero_templates;
104        let peptide_cutoff = self.peptide_bond_cutoff;
105        let nucleic_cutoff = self.nucleic_bond_cutoff;
106        let disulfide_cutoff = self.disulfide_bond_cutoff;
107
108        let (mut bonds, sulfurs) = structure
109            .par_chains()
110            .zip(chain_offsets)
111            .map(|(chain, chain_start_offset)| {
112                let mut local_bonds = Vec::new();
113                let mut local_sulfurs = Vec::new();
114                let mut residue_offset = chain_start_offset;
115
116                let residues: Vec<_> = chain.iter_residues().collect();
117
118                for (i, residue) in residues.iter().enumerate() {
119                    let atom_count = residue.atom_count();
120
121                    Self::build_intra_residue_for_residue(
122                        residue,
123                        residue_offset,
124                        hetero_templates,
125                        &mut local_bonds,
126                    )?;
127
128                    if i < residues.len() - 1 {
129                        let next_residue = residues[i + 1];
130                        let next_offset = residue_offset + atom_count;
131
132                        Self::build_backbone_bond(
133                            residue,
134                            residue_offset,
135                            next_residue,
136                            next_offset,
137                            peptide_cutoff,
138                            nucleic_cutoff,
139                            &mut local_bonds,
140                        );
141                    }
142
143                    match residue.name.as_str() {
144                        "CYX" | "CYM" => {
145                            if let Some(sg_idx) = residue.iter_atoms().position(|a| a.name == "SG")
146                            {
147                                let sg_pos = residue.atoms()[sg_idx].pos;
148                                local_sulfurs.push((sg_pos, residue_offset + sg_idx));
149                            }
150                        }
151                        _ => {}
152                    }
153
154                    residue_offset += atom_count;
155                }
156
157                Ok((local_bonds, local_sulfurs))
158            })
159            .try_reduce(
160                || (Vec::new(), Vec::new()),
161                |mut a, b| {
162                    a.0.extend(b.0);
163                    a.1.extend(b.1);
164                    Ok(a)
165                },
166            )?;
167
168        if !sulfurs.is_empty() {
169            let grid = Grid::new(
170                sulfurs.iter().map(|(p, i)| (*p, *i)),
171                disulfide_cutoff + 0.5,
172            );
173
174            let disulfide_bonds: Vec<Bond> = sulfurs
175                .par_iter()
176                .flat_map(|(pos, idx1)| {
177                    grid.neighbors(pos, disulfide_cutoff)
178                        .exact()
179                        .filter_map(|(_, idx2)| {
180                            if *idx1 < *idx2 {
181                                Some(Bond::new(*idx1, *idx2, BondOrder::Single))
182                            } else {
183                                None
184                            }
185                        })
186                        .collect::<Vec<_>>()
187                })
188                .collect();
189
190            bonds.extend(disulfide_bonds);
191        }
192
193        bonds.par_sort_unstable();
194        bonds.dedup();
195
196        Ok(Topology::new(structure, bonds))
197    }
198
199    /// Helper to generate intra-residue bonds for a single residue.
200    fn build_intra_residue_for_residue(
201        residue: &crate::model::residue::Residue,
202        offset: usize,
203        hetero_templates: &HashMap<String, Template>,
204        bonds: &mut Vec<Bond>,
205    ) -> Result<(), Error> {
206        if residue.category == ResidueCategory::Ion {
207            return Ok(());
208        }
209
210        if residue.category == ResidueCategory::Standard {
211            let tmpl_name = &residue.name;
212            let tmpl_view =
213                db::get_template(tmpl_name).ok_or_else(|| Error::MissingInternalTemplate {
214                    res_name: tmpl_name.to_string(),
215                })?;
216
217            for (a1_name, a2_name, order) in tmpl_view.bonds() {
218                Self::try_add_bond(residue, offset, a1_name, a2_name, order, bonds)?;
219            }
220
221            Self::handle_terminal_intra_bonds(residue, offset, bonds)?;
222        } else if residue.category == ResidueCategory::Hetero {
223            let tmpl = hetero_templates.get(residue.name.as_str()).ok_or_else(|| {
224                Error::MissingHeteroTemplate {
225                    res_name: residue.name.to_string(),
226                }
227            })?;
228
229            for (a1_name, a2_name, order) in tmpl.bonds() {
230                Self::try_add_bond(residue, offset, a1_name, a2_name, *order, bonds)?;
231            }
232        }
233
234        Ok(())
235    }
236
237    /// Helper to generate backbone bonds between two residues.
238    fn build_backbone_bond(
239        curr: &crate::model::residue::Residue,
240        curr_offset: usize,
241        next: &crate::model::residue::Residue,
242        next_offset: usize,
243        peptide_cutoff: f64,
244        nucleic_cutoff: f64,
245        bonds: &mut Vec<Bond>,
246    ) {
247        if curr.category != ResidueCategory::Standard || next.category != ResidueCategory::Standard
248        {
249            return;
250        }
251
252        if let (Some(std1), Some(std2)) = (curr.standard_name, next.standard_name) {
253            if std1.is_protein() && std2.is_protein() {
254                Self::connect_atoms_if_close(
255                    AtomLocator::new(curr, curr_offset, "C"),
256                    AtomLocator::new(next, next_offset, "N"),
257                    peptide_cutoff,
258                    BondOrder::Single,
259                    bonds,
260                );
261            } else if std1.is_nucleic() && std2.is_nucleic() {
262                Self::connect_atoms_if_close(
263                    AtomLocator::new(curr, curr_offset, "O3'"),
264                    AtomLocator::new(next, next_offset, "P"),
265                    nucleic_cutoff,
266                    BondOrder::Single,
267                    bonds,
268                );
269            }
270        }
271    }
272
273    /// Attempts to add a bond and reports informative errors when atoms are
274    /// missing.
275    fn try_add_bond(
276        residue: &crate::model::residue::Residue,
277        offset: usize,
278        name1: &str,
279        name2: &str,
280        order: BondOrder,
281        bonds: &mut Vec<Bond>,
282    ) -> Result<(), Error> {
283        let idx1 = residue.iter_atoms().position(|a| a.name == name1);
284        let idx2 = residue.iter_atoms().position(|a| a.name == name2);
285
286        match (idx1, idx2) {
287            (Some(i1), Some(i2)) => {
288                bonds.push(Bond::new(offset + i1, offset + i2, order));
289                Ok(())
290            }
291            (None, _) if Self::is_optional_terminal_atom(residue, name1) => Ok(()),
292            (_, None) if Self::is_optional_terminal_atom(residue, name2) => Ok(()),
293            (None, _) => Err(Error::topology_atom_missing(
294                &*residue.name,
295                residue.id,
296                name1,
297            )),
298            (_, None) => Err(Error::topology_atom_missing(
299                &*residue.name,
300                residue.id,
301                name2,
302            )),
303        }
304    }
305
306    /// Returns whether the provided atom is optional for the residue position.
307    fn is_optional_terminal_atom(
308        residue: &crate::model::residue::Residue,
309        atom_name: &str,
310    ) -> bool {
311        let is_protein = residue.standard_name.is_some_and(|std| std.is_protein());
312        let is_nucleic = residue.standard_name.is_some_and(|std| std.is_nucleic());
313
314        match residue.position {
315            ResiduePosition::NTerminal if is_protein => atom_name == "H",
316            ResiduePosition::CTerminal if is_protein => matches!(atom_name, "HXT" | "HOXT"),
317            ResiduePosition::FivePrime if is_nucleic => {
318                if residue.has_atom("P") {
319                    matches!(atom_name, "OP3" | "HOP3" | "HOP2")
320                } else {
321                    matches!(
322                        atom_name,
323                        "P" | "OP1" | "OP2" | "OP3" | "HOP3" | "HOP2" | "HO5'"
324                    )
325                }
326            }
327            ResiduePosition::ThreePrime if is_nucleic => atom_name == "HO3'",
328            _ => false,
329        }
330    }
331
332    /// Adds missing bonds for termini (protein and nucleic acid) that are not
333    /// explicitly listed in templates.
334    fn handle_terminal_intra_bonds(
335        residue: &crate::model::residue::Residue,
336        offset: usize,
337        bonds: &mut Vec<Bond>,
338    ) -> Result<(), Error> {
339        if residue.position == ResiduePosition::NTerminal
340            && residue.standard_name.is_some_and(|s| s.is_protein())
341        {
342            for h_name in ["H1", "H2", "H3"] {
343                if let (Some(h_idx), Some(n_idx)) = (
344                    residue.iter_atoms().position(|a| a.name == h_name),
345                    residue.iter_atoms().position(|a| a.name == "N"),
346                ) {
347                    bonds.push(Bond::new(offset + h_idx, offset + n_idx, BondOrder::Single));
348                }
349            }
350        }
351
352        if residue.position == ResiduePosition::CTerminal
353            && residue.standard_name.is_some_and(|s| s.is_protein())
354        {
355            let c_idx = residue.iter_atoms().position(|a| a.name == "C");
356            let oxt_idx = residue.iter_atoms().position(|a| a.name == "OXT");
357
358            if let (Some(c_idx), Some(oxt_idx)) = (c_idx, oxt_idx) {
359                bonds.push(Bond::new(
360                    offset + c_idx,
361                    offset + oxt_idx,
362                    BondOrder::Single,
363                ));
364
365                for h_name in ["HXT", "HOXT"] {
366                    if let Some(h_idx) = residue.iter_atoms().position(|a| a.name == h_name) {
367                        bonds.push(Bond::new(
368                            offset + oxt_idx,
369                            offset + h_idx,
370                            BondOrder::Single,
371                        ));
372                    }
373                }
374            }
375        }
376
377        if residue.position == ResiduePosition::FivePrime
378            && residue.standard_name.is_some_and(|s| s.is_nucleic())
379        {
380            if let (Some(p_idx), Some(op3_idx)) = (
381                residue.iter_atoms().position(|a| a.name == "P"),
382                residue.iter_atoms().position(|a| a.name == "OP3"),
383            ) {
384                bonds.push(Bond::new(
385                    offset + p_idx,
386                    offset + op3_idx,
387                    BondOrder::Single,
388                ));
389
390                if let Some(hop3_idx) = residue.iter_atoms().position(|a| a.name == "HOP3") {
391                    bonds.push(Bond::new(
392                        offset + op3_idx,
393                        offset + hop3_idx,
394                        BondOrder::Single,
395                    ));
396                }
397            }
398
399            if let (Some(ho5_idx), Some(o5_idx)) = (
400                residue.iter_atoms().position(|a| a.name == "HO5'"),
401                residue.iter_atoms().position(|a| a.name == "O5'"),
402            ) {
403                bonds.push(Bond::new(
404                    offset + ho5_idx,
405                    offset + o5_idx,
406                    BondOrder::Single,
407                ));
408            }
409        }
410
411        if residue.position == ResiduePosition::ThreePrime
412            && residue.standard_name.is_some_and(|s| s.is_nucleic())
413        {
414            let ho3_idx = residue.iter_atoms().position(|a| a.name == "HO3'");
415            let o3_idx = residue.iter_atoms().position(|a| a.name == "O3'");
416
417            if let (Some(h_idx), Some(o_idx)) = (ho3_idx, o3_idx) {
418                bonds.push(Bond::new(offset + h_idx, offset + o_idx, BondOrder::Single));
419            }
420        }
421
422        Ok(())
423    }
424
425    /// Adds a bond when the specified atoms are within the provided cutoff.
426    fn connect_atoms_if_close(
427        first: AtomLocator<'_>,
428        second: AtomLocator<'_>,
429        cutoff: f64,
430        order: BondOrder,
431        bonds: &mut Vec<Bond>,
432    ) {
433        if let (Some(idx1), Some(idx2)) = (
434            first
435                .residue
436                .iter_atoms()
437                .position(|a| a.name == first.atom_name),
438            second
439                .residue
440                .iter_atoms()
441                .position(|a| a.name == second.atom_name),
442        ) {
443            let p1 = first.residue.atoms()[idx1].pos;
444            let p2 = second.residue.atoms()[idx2].pos;
445
446            if nalgebra::distance_squared(&p1, &p2) <= cutoff * cutoff {
447                bonds.push(Bond::new(first.offset + idx1, second.offset + idx2, order));
448            }
449        }
450    }
451}
452
453/// Utility that couples a residue reference with its global atom offset.
454struct AtomLocator<'a> {
455    residue: &'a crate::model::residue::Residue,
456    offset: usize,
457    atom_name: &'a str,
458}
459
460impl<'a> AtomLocator<'a> {
461    /// Creates a locator describing a specific atom inside a residue.
462    fn new(residue: &'a crate::model::residue::Residue, offset: usize, atom_name: &'a str) -> Self {
463        Self {
464            residue,
465            offset,
466            atom_name,
467        }
468    }
469}
470
471#[cfg(test)]
472mod tests {
473    use super::*;
474    use crate::model::{
475        atom::Atom,
476        chain::Chain,
477        residue::Residue,
478        structure::Structure,
479        template::Template,
480        types::{Element, Point, ResidueCategory, ResiduePosition},
481    };
482    use nalgebra::Vector3;
483    use std::collections::HashSet;
484
485    fn structure_from_residues(residues: Vec<Residue>) -> Structure {
486        let mut chain = Chain::new("A");
487        for residue in residues {
488            chain.add_residue(residue);
489        }
490
491        let mut structure = Structure::new();
492        structure.add_chain(chain);
493        structure
494    }
495
496    fn standard_residue(name: &str, id: i32, position: ResiduePosition) -> Residue {
497        let template = db::get_template(name)
498            .unwrap_or_else(|| panic!("missing internal template for test residue '{name}'"));
499
500        let mut residue = Residue::new(
501            id,
502            None,
503            name,
504            Some(template.standard_name()),
505            ResidueCategory::Standard,
506        );
507        residue.position = position;
508
509        for (atom_name, element, point) in template.heavy_atoms() {
510            residue.add_atom(Atom::new(atom_name, element, point));
511        }
512
513        for (atom_name, point, _) in template.hydrogens() {
514            residue.add_atom(Atom::new(atom_name, Element::H, point));
515        }
516
517        residue
518    }
519
520    fn translate_residue(residue: &mut Residue, offset: Vector3<f64>) {
521        for atom in residue.iter_atoms_mut() {
522            atom.translate_by(&offset);
523        }
524    }
525
526    fn global_atom_index(
527        topology: &Topology,
528        chain_id: &str,
529        residue_id: i32,
530        atom_name: &str,
531    ) -> usize {
532        let mut idx = 0;
533        for chain in topology.structure().iter_chains() {
534            for residue in chain.iter_residues() {
535                for atom in residue.iter_atoms() {
536                    if chain.id == chain_id && residue.id == residue_id && atom.name == atom_name {
537                        return idx;
538                    }
539                    idx += 1;
540                }
541            }
542        }
543
544        panic!(
545            "atom '{}' not found in chain '{}' residue '{}'",
546            atom_name, chain_id, residue_id
547        );
548    }
549
550    fn has_bond(topology: &Topology, idx1: usize, idx2: usize, order: BondOrder) -> bool {
551        let (a1, a2) = if idx1 <= idx2 {
552            (idx1, idx2)
553        } else {
554            (idx2, idx1)
555        };
556        topology
557            .bonds()
558            .iter()
559            .any(|bond| bond.a1_idx == a1 && bond.a2_idx == a2 && bond.order == order)
560    }
561
562    fn five_prime_residue_with_phosphate_and_op3(id: i32) -> Residue {
563        let mut residue = standard_residue("DA", id, ResiduePosition::FivePrime);
564        let p_pos = residue.atom("P").unwrap().pos;
565        let op1_pos = residue.atom("OP1").unwrap().pos;
566        let op2_pos = residue.atom("OP2").unwrap().pos;
567        let o5_pos = residue.atom("O5'").unwrap().pos;
568        let centroid = (op1_pos.coords + op2_pos.coords + o5_pos.coords) / 3.0;
569        let direction = (p_pos.coords - centroid).normalize();
570        let op3_pos = p_pos + direction * 1.48;
571        residue.add_atom(Atom::new("OP3", Element::O, op3_pos));
572        residue
573    }
574
575    fn five_prime_residue_without_phosphate(id: i32) -> Residue {
576        let template = db::get_template("DA").unwrap();
577        let mut residue = Residue::new(
578            id,
579            None,
580            "DA",
581            Some(template.standard_name()),
582            ResidueCategory::Standard,
583        );
584        residue.position = ResiduePosition::FivePrime;
585
586        for (atom_name, element, point) in template.heavy_atoms() {
587            if !matches!(atom_name, "P" | "OP1" | "OP2") {
588                residue.add_atom(Atom::new(atom_name, element, point));
589            }
590        }
591
592        for (atom_name, point, _) in template.hydrogens() {
593            residue.add_atom(Atom::new(atom_name, Element::H, point));
594        }
595
596        let o5_pos = residue.atom("O5'").unwrap().pos;
597        let c5_pos = residue.atom("C5'").unwrap().pos;
598        let h_dir = (o5_pos - c5_pos).normalize();
599        let ho5_pos = o5_pos + h_dir * 0.96;
600        residue.add_atom(Atom::new("HO5'", Element::H, ho5_pos));
601
602        residue
603    }
604
605    #[test]
606    fn build_creates_peptide_bond_between_adjacent_proteins() {
607        let residue1 = standard_residue("GLY", 1, ResiduePosition::NTerminal);
608        let mut residue2 = standard_residue("ALA", 2, ResiduePosition::Internal);
609
610        let c_pos = residue1.atom("C").unwrap().pos;
611        let n_pos = residue2.atom("N").unwrap().pos;
612        let target_n = c_pos + Vector3::new(1.33, 0.0, 0.0);
613        translate_residue(&mut residue2, target_n - n_pos);
614
615        let structure = structure_from_residues(vec![residue1, residue2]);
616        let topology = TopologyBuilder::new()
617            .build(structure)
618            .expect("build topology");
619
620        let c_idx = global_atom_index(&topology, "A", 1, "C");
621        let n_idx = global_atom_index(&topology, "A", 2, "N");
622
623        assert!(has_bond(&topology, c_idx, n_idx, BondOrder::Single));
624    }
625
626    #[test]
627    fn build_creates_nucleic_backbone_bond() {
628        let residue1 = standard_residue("DA", 1, ResiduePosition::FivePrime);
629        let mut residue2 = standard_residue("DT", 2, ResiduePosition::ThreePrime);
630
631        let o3_pos = residue1.atom("O3'").unwrap().pos;
632        let p_pos = residue2.atom("P").unwrap().pos;
633        let target_p = o3_pos + Vector3::new(0.0, 0.0, 1.6);
634        translate_residue(&mut residue2, target_p - p_pos);
635
636        let structure = structure_from_residues(vec![residue1, residue2]);
637        let topology = TopologyBuilder::new()
638            .build(structure)
639            .expect("build topology");
640
641        let o3_idx = global_atom_index(&topology, "A", 1, "O3'");
642        let p_idx = global_atom_index(&topology, "A", 2, "P");
643
644        assert!(has_bond(&topology, o3_idx, p_idx, BondOrder::Single));
645    }
646
647    #[test]
648    fn build_adds_terminal_protein_bonds() {
649        let mut residue = standard_residue("ALA", 1, ResiduePosition::CTerminal);
650        let c_pos = residue.atom("C").unwrap().pos;
651        let oxt_pos = c_pos + Vector3::new(1.24, 0.0, 0.0);
652        let hxt_pos = oxt_pos + Vector3::new(0.96, 0.0, 0.0);
653
654        residue.add_atom(Atom::new("OXT", Element::O, oxt_pos));
655        residue.add_atom(Atom::new("HXT", Element::H, hxt_pos));
656
657        let structure = structure_from_residues(vec![residue]);
658        let topology = TopologyBuilder::new()
659            .build(structure)
660            .expect("build topology");
661
662        let c_idx = global_atom_index(&topology, "A", 1, "C");
663        let oxt_idx = global_atom_index(&topology, "A", 1, "OXT");
664        let hxt_idx = global_atom_index(&topology, "A", 1, "HXT");
665
666        assert!(has_bond(&topology, c_idx, oxt_idx, BondOrder::Single));
667        assert!(has_bond(&topology, oxt_idx, hxt_idx, BondOrder::Single));
668    }
669
670    #[test]
671    fn build_errors_when_required_standard_atom_missing() {
672        let mut residue = standard_residue("GLY", 1, ResiduePosition::Internal);
673        assert!(residue.remove_atom("CA").is_some(), "expected CA atom");
674
675        let structure = structure_from_residues(vec![residue]);
676        let err = TopologyBuilder::new().build(structure).unwrap_err();
677
678        match err {
679            Error::TopologyAtomMissing { atom_name, .. } => assert_eq!(atom_name, "CA"),
680            other => panic!("unexpected error: {other:?}"),
681        }
682    }
683
684    #[test]
685    fn build_errors_when_hetero_template_missing() {
686        let mut residue = Residue::new(1, None, "LIG", None, ResidueCategory::Hetero);
687        residue.add_atom(Atom::new("C1", Element::C, Point::origin()));
688
689        let structure = structure_from_residues(vec![residue]);
690        let err = TopologyBuilder::new().build(structure).unwrap_err();
691
692        match err {
693            Error::MissingHeteroTemplate { res_name } => assert_eq!(res_name, "LIG"),
694            other => panic!("unexpected error: {other:?}"),
695        }
696    }
697
698    #[test]
699    fn build_uses_hetero_template_for_hetero_residue() {
700        let template = Template::new(
701            "LIG",
702            vec!["C1".into(), "O1".into(), "H1".into()],
703            vec![
704                ("C1".into(), "O1".into(), BondOrder::Double),
705                ("C1".into(), "H1".into(), BondOrder::Single),
706            ],
707        );
708
709        let mut residue = Residue::new(1, None, "LIG", None, ResidueCategory::Hetero);
710        residue.add_atom(Atom::new("C1", Element::C, Point::new(0.0, 0.0, 0.0)));
711        residue.add_atom(Atom::new("O1", Element::O, Point::new(1.2, 0.0, 0.0)));
712        residue.add_atom(Atom::new("H1", Element::H, Point::new(-1.0, 0.0, 0.0)));
713
714        let structure = structure_from_residues(vec![residue]);
715        let builder = TopologyBuilder::new().add_hetero_template(template);
716        let topology = builder.build(structure).expect("build topology");
717
718        let c_idx = global_atom_index(&topology, "A", 1, "C1");
719        let o_idx = global_atom_index(&topology, "A", 1, "O1");
720        let h_idx = global_atom_index(&topology, "A", 1, "H1");
721
722        assert!(has_bond(&topology, c_idx, o_idx, BondOrder::Double));
723        assert!(has_bond(&topology, c_idx, h_idx, BondOrder::Single));
724    }
725
726    #[test]
727    fn build_creates_disulfide_bond_when_sg_within_cutoff() {
728        let residue1 = standard_residue("CYX", 1, ResiduePosition::Internal);
729        let mut residue2 = standard_residue("CYX", 2, ResiduePosition::Internal);
730
731        let sg1_pos = residue1.atom("SG").unwrap().pos;
732        let sg2_pos = residue2.atom("SG").unwrap().pos;
733        let target = sg1_pos + Vector3::new(2.0, 0.0, 0.0);
734        translate_residue(&mut residue2, target - sg2_pos);
735
736        let structure = structure_from_residues(vec![residue1, residue2]);
737        let topology = TopologyBuilder::new()
738            .build(structure)
739            .expect("build topology");
740
741        let sg1_idx = global_atom_index(&topology, "A", 1, "SG");
742        let sg2_idx = global_atom_index(&topology, "A", 2, "SG");
743
744        assert!(has_bond(&topology, sg1_idx, sg2_idx, BondOrder::Single));
745    }
746
747    #[test]
748    fn build_avoids_duplicate_bonds_for_standard_residue() {
749        let residue = standard_residue("ALA", 1, ResiduePosition::Internal);
750        let structure = structure_from_residues(vec![residue]);
751        let topology = TopologyBuilder::new()
752            .build(structure)
753            .expect("build topology");
754
755        let mut seen = HashSet::new();
756        for bond in topology.bonds() {
757            let key = (bond.a1_idx, bond.a2_idx, bond.order);
758            assert!(seen.insert(key), "duplicate bond detected: {key:?}");
759        }
760    }
761
762    #[test]
763    fn build_sets_expected_water_degree() {
764        let residue = standard_residue("HOH", 1, ResiduePosition::Internal);
765        let structure = structure_from_residues(vec![residue]);
766        let topology = TopologyBuilder::new()
767            .build(structure)
768            .expect("build topology");
769
770        let o_idx = global_atom_index(&topology, "A", 1, "O");
771        let neighbors: Vec<_> = topology.neighbors_of(o_idx).collect();
772
773        assert_eq!(neighbors.len(), 2, "water oxygen should have two neighbors");
774
775        let mut uniq = HashSet::new();
776        for idx in neighbors {
777            assert!(uniq.insert(idx), "neighbor duplicated for water oxygen");
778        }
779    }
780
781    #[test]
782    fn build_creates_p_op3_bond_for_5prime_phosphate() {
783        let residue = five_prime_residue_with_phosphate_and_op3(1);
784        let structure = structure_from_residues(vec![residue]);
785        let topology = TopologyBuilder::new()
786            .build(structure)
787            .expect("build topology");
788
789        let p_idx = global_atom_index(&topology, "A", 1, "P");
790        let op3_idx = global_atom_index(&topology, "A", 1, "OP3");
791
792        assert!(has_bond(&topology, p_idx, op3_idx, BondOrder::Single));
793    }
794
795    #[test]
796    fn build_creates_op3_hop3_bond_when_protonated() {
797        let mut residue = five_prime_residue_with_phosphate_and_op3(1);
798        let op3_pos = residue.atom("OP3").unwrap().pos;
799        let p_pos = residue.atom("P").unwrap().pos;
800        let h_dir = (op3_pos - p_pos).normalize();
801        let hop3_pos = op3_pos + h_dir * 0.96;
802        residue.add_atom(Atom::new("HOP3", Element::H, hop3_pos));
803
804        let structure = structure_from_residues(vec![residue]);
805        let topology = TopologyBuilder::new()
806            .build(structure)
807            .expect("build topology");
808
809        let op3_idx = global_atom_index(&topology, "A", 1, "OP3");
810        let hop3_idx = global_atom_index(&topology, "A", 1, "HOP3");
811
812        assert!(has_bond(&topology, op3_idx, hop3_idx, BondOrder::Single));
813    }
814
815    #[test]
816    fn build_creates_o5_ho5_bond_for_5prime_hydroxyl() {
817        let residue = five_prime_residue_without_phosphate(1);
818        let structure = structure_from_residues(vec![residue]);
819        let topology = TopologyBuilder::new()
820            .build(structure)
821            .expect("build topology");
822
823        let o5_idx = global_atom_index(&topology, "A", 1, "O5'");
824        let ho5_idx = global_atom_index(&topology, "A", 1, "HO5'");
825
826        assert!(has_bond(&topology, o5_idx, ho5_idx, BondOrder::Single));
827    }
828
829    #[test]
830    fn optional_terminal_atoms_includes_nucleic_5prime_special_atoms() {
831        let residue_with_p = five_prime_residue_with_phosphate_and_op3(1);
832
833        assert!(TopologyBuilder::is_optional_terminal_atom(
834            &residue_with_p,
835            "OP3"
836        ));
837        assert!(TopologyBuilder::is_optional_terminal_atom(
838            &residue_with_p,
839            "HOP3"
840        ));
841        assert!(TopologyBuilder::is_optional_terminal_atom(
842            &residue_with_p,
843            "HOP2"
844        ));
845        assert!(!TopologyBuilder::is_optional_terminal_atom(
846            &residue_with_p,
847            "P"
848        ));
849        assert!(!TopologyBuilder::is_optional_terminal_atom(
850            &residue_with_p,
851            "OP1"
852        ));
853
854        let residue_no_p = five_prime_residue_without_phosphate(2);
855        assert!(TopologyBuilder::is_optional_terminal_atom(
856            &residue_no_p,
857            "P"
858        ));
859        assert!(TopologyBuilder::is_optional_terminal_atom(
860            &residue_no_p,
861            "OP1"
862        ));
863        assert!(TopologyBuilder::is_optional_terminal_atom(
864            &residue_no_p,
865            "OP2"
866        ));
867        assert!(TopologyBuilder::is_optional_terminal_atom(
868            &residue_no_p,
869            "HO5'"
870        ));
871    }
872}