Skip to main content

cosmolkit_core/bio/
mod.rs

1//! Biomolecular structure primitives.
2//!
3//! `BioStructure` is a flat-row, hierarchy-indexed representation for proteins,
4//! DNA, RNA, and complexes. It is NOT a giant `Molecule`; it is a hierarchy +
5//! coordinate + assembly object. See `dev/BioStructureOperationContractDesign.md`.
6//!
7//! Gemmi marker convention is defined in `dev/source_reproduction_protocol.md`.
8
9use std::collections::HashMap;
10use std::marker::PhantomData;
11
12pub mod invariants;
13pub mod ops;
14pub mod protein;
15pub mod resinfo;
16
17// ---------------------------------------------------------------------------
18// Stable row IDs
19// ---------------------------------------------------------------------------
20
21macro_rules! row_id {
22    ($name:ident) => {
23        #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
24        pub struct $name(u32);
25
26        impl $name {
27            #[must_use]
28            pub const fn new(index: u32) -> Self {
29                Self(index)
30            }
31
32            #[must_use]
33            pub const fn index(self) -> u32 {
34                self.0
35            }
36        }
37    };
38}
39
40row_id!(AtomId);
41row_id!(ResidueId);
42row_id!(ChainId);
43row_id!(EntityId);
44row_id!(ModelId);
45row_id!(BondId);
46row_id!(AssemblyId);
47row_id!(AltLocGroupId);
48
49// ---------------------------------------------------------------------------
50// RowSpan — contiguous child range within a parent block
51// ---------------------------------------------------------------------------
52
53#[derive(Debug, Clone, Copy, PartialEq, Eq)]
54pub struct RowSpan<T> {
55    pub start: u32,
56    pub len: u32,
57    _marker: PhantomData<T>,
58}
59
60impl<T> RowSpan<T> {
61    #[must_use]
62    pub const fn new(start: u32, len: u32) -> Self {
63        Self {
64            start,
65            len,
66            _marker: PhantomData,
67        }
68    }
69
70    #[must_use]
71    pub const fn end(self) -> u32 {
72        self.start + self.len
73    }
74
75    #[must_use]
76    pub const fn is_empty(self) -> bool {
77        self.len == 0
78    }
79}
80
81// ---------------------------------------------------------------------------
82// Polymer classification enums
83// ---------------------------------------------------------------------------
84
85#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
86pub enum ResidueKind {
87    AminoAcid,
88    DNA,
89    RNA,
90    Saccharide,
91    Water,
92    Ligand,
93    Ion,
94    Cofactor,
95    Unknown,
96}
97
98#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
99pub enum PolymerKind {
100    Peptide,
101    DNA,
102    RNA,
103    PeptideLike,
104    NucleicAcidHybrid,
105    Saccharide,
106    NonPolymer,
107    Water,
108    Unknown,
109}
110
111#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
112pub enum EntityKind {
113    Polymer,
114    NonPolymer,
115    Branched,
116    Water,
117    Unknown,
118}
119
120#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
121pub enum ChainKind {
122    Protein,
123    DNA,
124    RNA,
125    ProteinDNAComplex,
126    ProteinRNAComplex,
127    LigandOnly,
128    WaterOnly,
129    Mixed,
130    Unknown,
131}
132
133// ---------------------------------------------------------------------------
134// Source identifier types (PDB/mmCIF provenance, NOT row ids)
135// ---------------------------------------------------------------------------
136
137/// PDB atom serial number (source provenance only).
138#[derive(Debug, Clone, Copy, PartialEq, Eq)]
139pub struct PdbAtomSerial(pub i32);
140
141/// PDB/mmCIF chain identifier string (up to 4 chars).
142#[derive(Debug, Clone, Copy, PartialEq, Eq)]
143pub struct PdbChainId(pub [u8; 4], pub u8);
144
145impl PdbChainId {
146    #[must_use]
147    pub fn as_str(&self) -> &str {
148        std::str::from_utf8(&self.0[..self.1 as usize]).unwrap_or("")
149    }
150}
151
152/// PDB residue sequence number + insertion code.
153#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
154pub struct PdbSeqId {
155    pub seq_num: i32,
156    pub ins_code: Option<u8>,
157}
158
159// ---------------------------------------------------------------------------
160// Atom / residue / chain / model name types
161// ---------------------------------------------------------------------------
162
163/// Up to 4-char atom name (e.g. " CA ", " N  ").
164#[derive(Debug, Clone, Copy, PartialEq, Eq)]
165pub struct AtomName(pub [u8; 4]);
166
167/// Up to 3-char residue name (e.g. "ALA", "GLY").
168#[derive(Debug, Clone, Copy, PartialEq, Eq)]
169pub struct ResidueName(pub [u8; 4], pub u8);
170
171impl ResidueName {
172    #[must_use]
173    pub fn as_str(&self) -> &str {
174        std::str::from_utf8(&self.0[..self.1 as usize]).unwrap_or("")
175    }
176}
177
178#[must_use]
179pub fn classify_residue_name(name: ResidueName) -> ResidueKind {
180    let info = resinfo::find_tabulated_residue(name.as_str());
181    if info.is_amino_acid() {
182        ResidueKind::AminoAcid
183    } else if info.is_water() {
184        ResidueKind::Water
185    } else {
186        ResidueKind::Unknown
187    }
188}
189
190#[cfg(test)]
191mod tests {
192    use super::*;
193
194    fn residue_name(value: &str) -> ResidueName {
195        let mut bytes = [0; 4];
196        bytes[..value.len()].copy_from_slice(value.as_bytes());
197        ResidueName(bytes, value.len() as u8)
198    }
199
200    #[test]
201    fn classifies_complete_gemmi_amino_acid_vocabulary() {
202        let amino_acids = resinfo::RESIDUE_INFO_TABLE
203            .iter()
204            .filter(|info| info.is_amino_acid())
205            .collect::<Vec<_>>();
206        assert_eq!(amino_acids.len(), 128);
207        for info in amino_acids {
208            assert_eq!(
209                classify_residue_name(residue_name(info.name)),
210                ResidueKind::AminoAcid
211            );
212        }
213    }
214
215    #[test]
216    fn classifies_gemmi_water_names_without_guessing_other_residues() {
217        for name in ["HOH", "DOD", "WAT", "H2O"] {
218            assert_eq!(
219                classify_residue_name(residue_name(name)),
220                ResidueKind::Water
221            );
222        }
223        assert_eq!(
224            classify_residue_name(residue_name("XYZ")),
225            ResidueKind::Unknown
226        );
227    }
228}
229
230/// Single-char altloc label (e.g. b'A', b'B').
231#[derive(Debug, Clone, Copy, PartialEq, Eq)]
232pub struct AltLocLabel(pub u8);
233
234// ---------------------------------------------------------------------------
235// Source identifier bundles (per row)
236// ---------------------------------------------------------------------------
237
238#[derive(Debug, Clone, Copy, PartialEq, Eq)]
239pub struct AtomSourceIds {
240    pub serial: Option<PdbAtomSerial>,
241}
242
243#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
244pub enum BioCalcFlag {
245    #[default]
246    NotSet,
247    NoHydrogen,
248    Determined,
249    Calculated,
250    Dummy,
251}
252
253#[derive(Debug, Clone, Copy, PartialEq, Eq)]
254pub struct ResidueSourceIds {
255    pub seq_id: Option<PdbSeqId>,
256    pub label_seq_id: Option<i32>,
257    pub segment_id: Option<[u8; 4]>,
258    pub subchain_id: Option<PdbChainId>,
259    pub label_entity_id: Option<EntityId>,
260}
261
262#[derive(Debug, Clone, Copy, PartialEq, Eq)]
263pub struct ChainSourceIds {
264    pub auth_chain_id: Option<PdbChainId>,
265    pub label_asym_id: Option<PdbChainId>,
266}
267
268#[derive(Debug, Clone, PartialEq, Eq)]
269pub struct EntitySourceIds {
270    pub source_entity_id: String,
271}
272
273// ---------------------------------------------------------------------------
274// Flat row types
275// ---------------------------------------------------------------------------
276
277#[derive(Debug, Clone, PartialEq)]
278pub struct AtomRow {
279    pub residue_id: ResidueId,
280    pub name: AtomName,
281    pub element: crate::Element,
282    pub altloc: Option<AltLocLabel>,
283    pub occupancy: Option<f32>,
284    pub b_iso: Option<f32>,
285    pub formal_charge: Option<i8>,
286    pub anisou: Option<[f32; 6]>,
287    pub calc_flag: BioCalcFlag,
288    pub tls_group_id: Option<i16>,
289    pub fraction: Option<f32>,
290    pub source: AtomSourceIds,
291}
292
293#[derive(Debug, Clone, PartialEq)]
294pub struct ResidueRow {
295    pub chain_id: ChainId,
296    pub atom_span: RowSpan<AtomId>,
297    pub name: ResidueName,
298    pub kind: ResidueKind,
299    pub entity_kind: EntityKind,
300    pub het_flag: Option<char>,
301    pub source: ResidueSourceIds,
302    pub sifts_unp: Option<BioSiftsUnpResidue>,
303}
304
305#[derive(Debug, Clone, PartialEq)]
306pub struct ChainRow {
307    pub model_id: ModelId,
308    pub entity_id: Option<EntityId>,
309    pub residue_span: RowSpan<ResidueId>,
310    pub kind: ChainKind,
311    pub source: ChainSourceIds,
312}
313
314#[derive(Debug, Clone, PartialEq, Eq)]
315pub struct EntityRow {
316    pub kind: EntityKind,
317    pub polymer_kind: PolymerKind,
318    pub reflects_microhetero: bool,
319    pub sequence: Vec<String>,
320    pub dbrefs: Vec<BioEntityDbRef>,
321    pub sifts_unp_acc: Vec<String>,
322    pub subchains: Vec<PdbChainId>,
323    pub source: EntitySourceIds,
324}
325
326#[derive(Debug, Clone, PartialEq, Eq, Default)]
327pub struct BioEntityDbRef {
328    pub db_name: String,
329    pub accession_code: String,
330    pub id_code: String,
331    pub isoform: String,
332    pub seq_begin: PdbSeqId,
333    pub seq_end: PdbSeqId,
334    pub db_begin: PdbSeqId,
335    pub db_end: PdbSeqId,
336    pub label_seq_begin: Option<i32>,
337    pub label_seq_end: Option<i32>,
338}
339
340#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
341pub struct BioSiftsUnpResidue {
342    pub res: Option<char>,
343    pub acc_index: u8,
344    pub num: u16,
345}
346
347#[derive(Debug, Clone, PartialEq, Eq, Default)]
348pub struct BioModRes {
349    pub chain_name: String,
350    pub res_id: PdbSeqId,
351    pub parent_comp_id: String,
352    pub mod_id: String,
353    pub details: String,
354}
355
356#[derive(Debug, Clone, PartialEq)]
357pub struct ModelRow {
358    pub chain_span: RowSpan<ChainId>,
359    pub source_model_number: Option<i32>,
360}
361
362#[derive(Debug, Clone, PartialEq, Default)]
363pub struct BioMetadata {
364    pub entry_id: Option<String>,
365    pub title: Option<String>,
366    pub pdbx_keywords: Option<String>,
367    pub keywords: Option<String>,
368    pub experimental_method: Option<String>,
369    pub received_initial_deposition_date: Option<String>,
370    pub authors: Vec<String>,
371    pub software: Vec<BioSoftwareItem>,
372    pub refinement: Vec<BioRefinementInfo>,
373    pub experiments: Vec<BioExperimentInfo>,
374    pub experiment_crystals: Vec<BioExperimentCrystalInfo>,
375    pub solved_by: Option<String>,
376    pub starting_model: Option<String>,
377    pub remark_300_detail: Option<String>,
378}
379
380#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
381pub enum BioSoftwareClassification {
382    DataCollection,
383    DataExtraction,
384    DataProcessing,
385    DataReduction,
386    DataScaling,
387    ModelBuilding,
388    Phasing,
389    Refinement,
390    #[default]
391    Unspecified,
392}
393
394#[derive(Debug, Clone, PartialEq, Default)]
395pub struct BioSoftwareItem {
396    pub name: String,
397    pub version: String,
398    pub date: String,
399    pub description: String,
400    pub contact_author: String,
401    pub contact_author_email: String,
402    pub classification: BioSoftwareClassification,
403}
404
405#[derive(Debug, Clone, PartialEq, Default)]
406pub struct BioRefinementBin {
407    pub resolution_high: Option<f64>,
408    pub resolution_low: Option<f64>,
409    pub completeness: Option<f64>,
410    pub reflection_count: Option<i32>,
411    pub work_set_count: Option<i32>,
412    pub rfree_set_count: Option<i32>,
413    pub r_all: Option<f64>,
414    pub r_work: Option<f64>,
415    pub r_free: Option<f64>,
416    pub cc_fo_fc_work: Option<f64>,
417    pub cc_fo_fc_free: Option<f64>,
418    pub fsc_work: Option<f64>,
419    pub fsc_free: Option<f64>,
420    pub cc_intensity_work: Option<f64>,
421    pub cc_intensity_free: Option<f64>,
422}
423
424#[derive(Debug, Clone, PartialEq, Default)]
425pub struct BioRefinementRestraint {
426    pub name: String,
427    pub count: Option<i32>,
428    pub weight: Option<f64>,
429    pub function: String,
430    pub dev_ideal: Option<f64>,
431}
432
433#[derive(Debug, Clone, PartialEq, Default)]
434pub struct BioTlsSelection {
435    pub chain: String,
436    pub res_begin: String,
437    pub res_end: String,
438    pub details: String,
439}
440
441#[derive(Debug, Clone, PartialEq)]
442pub struct BioTlsGroup {
443    pub num_id: Option<i16>,
444    pub id: String,
445    pub selections: Vec<BioTlsSelection>,
446    pub origin: [f64; 3],
447    pub t: [[f64; 3]; 3],
448    pub l: [[f64; 3]; 3],
449    pub s: [[f64; 3]; 3],
450}
451
452impl Default for BioTlsGroup {
453    fn default() -> Self {
454        Self {
455            num_id: None,
456            id: String::new(),
457            selections: Vec::new(),
458            origin: [f64::NAN; 3],
459            t: [[f64::NAN; 3]; 3],
460            l: [[f64::NAN; 3]; 3],
461            s: [[f64::NAN; 3]; 3],
462        }
463    }
464}
465
466#[derive(Debug, Clone, PartialEq)]
467pub struct BioAnisotropicB {
468    pub u11: f64,
469    pub u22: f64,
470    pub u33: f64,
471    pub u12: f64,
472    pub u13: f64,
473    pub u23: f64,
474}
475
476impl Default for BioAnisotropicB {
477    fn default() -> Self {
478        Self {
479            u11: f64::NAN,
480            u22: f64::NAN,
481            u33: f64::NAN,
482            u12: f64::NAN,
483            u13: f64::NAN,
484            u23: f64::NAN,
485        }
486    }
487}
488
489#[derive(Debug, Clone, PartialEq, Default)]
490pub struct BioRefinementInfo {
491    pub id: String,
492    pub resolution_high: Option<f64>,
493    pub resolution_low: Option<f64>,
494    pub completeness: Option<f64>,
495    pub reflection_count: Option<i32>,
496    pub work_set_count: Option<i32>,
497    pub rfree_set_count: Option<i32>,
498    pub r_all: Option<f64>,
499    pub r_work: Option<f64>,
500    pub r_free: Option<f64>,
501    pub cross_validation_method: String,
502    pub rfree_selection_method: String,
503    pub bin_count: Option<i32>,
504    pub bins: Vec<BioRefinementBin>,
505    pub mean_b: Option<f64>,
506    pub aniso_b: BioAnisotropicB,
507    pub luzzati_error: Option<f64>,
508    pub dpi_blow_r: Option<f64>,
509    pub dpi_blow_rfree: Option<f64>,
510    pub dpi_cruickshank_r: Option<f64>,
511    pub dpi_cruickshank_rfree: Option<f64>,
512    pub cc_fo_fc_work: Option<f64>,
513    pub cc_fo_fc_free: Option<f64>,
514    pub fsc_work: Option<f64>,
515    pub fsc_free: Option<f64>,
516    pub cc_intensity_work: Option<f64>,
517    pub cc_intensity_free: Option<f64>,
518    pub restr_stats: Vec<BioRefinementRestraint>,
519    pub tls_groups: Vec<BioTlsGroup>,
520    pub remarks: String,
521}
522
523#[derive(Debug, Clone, PartialEq, Default)]
524pub struct BioReflectionsInfo {
525    pub resolution_high: Option<f64>,
526    pub resolution_low: Option<f64>,
527    pub completeness: Option<f64>,
528    pub redundancy: Option<f64>,
529    pub r_merge: Option<f64>,
530    pub r_sym: Option<f64>,
531    pub mean_i_over_sigma: Option<f64>,
532}
533
534#[derive(Debug, Clone, PartialEq, Default)]
535pub struct BioExperimentInfo {
536    pub method: String,
537    pub number_of_crystals: Option<i32>,
538    pub unique_reflections: Option<i32>,
539    pub diffraction_ids: Vec<String>,
540    pub reflections: BioReflectionsInfo,
541    pub b_wilson: Option<f64>,
542    pub shells: Vec<BioReflectionsInfo>,
543}
544
545#[derive(Debug, Clone, PartialEq, Default)]
546pub struct BioDiffractionInfo {
547    pub id: String,
548    pub collection_date: String,
549    pub temperature: Option<f64>,
550    pub source: String,
551    pub source_type: String,
552    pub synchrotron: String,
553    pub beamline: String,
554    pub wavelengths: String,
555    pub scattering_type: String,
556    pub monochromator: String,
557    pub optics: String,
558    pub detector: String,
559    pub detector_make: String,
560    pub mono_or_laue: Option<char>,
561}
562
563#[derive(Debug, Clone, PartialEq, Default)]
564pub struct BioExperimentCrystalInfo {
565    pub id: String,
566    pub description: String,
567    pub ph: Option<f64>,
568    pub ph_range: String,
569    pub diffractions: Vec<BioDiffractionInfo>,
570}
571
572#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
573pub enum BioAsu {
574    #[default]
575    Any,
576    Same,
577    Different,
578}
579
580#[derive(Debug, Clone, PartialEq, Eq, Default)]
581pub struct BioAtomAddress {
582    pub chain_name: String,
583    pub seq_id: Option<PdbSeqId>,
584    pub atom_name: String,
585    pub altloc: Option<AltLocLabel>,
586}
587
588#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
589pub enum BioConnectionType {
590    #[default]
591    Disulf,
592    Covale,
593    MetalC,
594}
595
596#[derive(Debug, Clone, PartialEq, Default)]
597pub struct BioConnection {
598    pub name: String,
599    pub type_: BioConnectionType,
600    pub partner1: BioAtomAddress,
601    pub partner2: BioAtomAddress,
602    pub asu: BioAsu,
603    pub reported_sym: [i16; 4],
604    pub reported_distance: Option<f32>,
605    pub link_id: String,
606}
607
608#[derive(Debug, Clone, PartialEq, Default)]
609pub struct BioCisPep {
610    pub partner_c: BioAtomAddress,
611    pub partner_n: BioAtomAddress,
612    pub model_num: i32,
613    pub only_altloc: Option<AltLocLabel>,
614    pub reported_angle: Option<f32>,
615}
616
617#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
618pub enum BioHelixClass {
619    #[default]
620    UnknownHelix,
621    RAlpha,
622    ROmega,
623    RPi,
624    RGamma,
625    R310,
626    LAlpha,
627    LOmega,
628    LGamma,
629    Helix27,
630    HelixPolyProlineNone,
631}
632
633#[derive(Debug, Clone, PartialEq, Default)]
634pub struct BioHelix {
635    pub start: BioAtomAddress,
636    pub end: BioAtomAddress,
637    pub helix_class: BioHelixClass,
638    pub length: i32,
639}
640
641impl BioHelix {
642    pub fn set_helix_class_as_int(&mut self, n: i32) {
643        self.helix_class = match n {
644            1 => BioHelixClass::RAlpha,
645            2 => BioHelixClass::ROmega,
646            3 => BioHelixClass::RPi,
647            4 => BioHelixClass::RGamma,
648            5 => BioHelixClass::R310,
649            6 => BioHelixClass::LAlpha,
650            7 => BioHelixClass::LOmega,
651            8 => BioHelixClass::LGamma,
652            9 => BioHelixClass::Helix27,
653            10 => BioHelixClass::HelixPolyProlineNone,
654            _ => BioHelixClass::UnknownHelix,
655        };
656    }
657}
658
659#[derive(Debug, Clone, PartialEq, Default)]
660pub struct BioSheetStrand {
661    pub start: BioAtomAddress,
662    pub end: BioAtomAddress,
663    pub hbond_atom2: BioAtomAddress,
664    pub hbond_atom1: BioAtomAddress,
665    pub sense: i32,
666    pub name: String,
667}
668
669#[derive(Debug, Clone, PartialEq, Default)]
670pub struct BioSheet {
671    pub name: String,
672    pub strands: Vec<BioSheetStrand>,
673}
674
675#[derive(Debug, Clone, Copy, PartialEq, Default)]
676pub struct BioTransform {
677    pub mat: [[f32; 3]; 3],
678    pub vec: [f32; 3],
679}
680
681#[derive(Debug, Clone, PartialEq, Default)]
682pub struct BioNcsOperator {
683    pub id: String,
684    pub given: bool,
685    pub transform: BioTransform,
686}
687
688#[derive(Debug, Clone, PartialEq, Default)]
689pub struct BioAssemblyOperator {
690    pub name: String,
691    pub type_: String,
692    pub transform: BioTransform,
693}
694
695#[derive(Debug, Clone, PartialEq, Default)]
696pub struct BioAssemblyGenerator {
697    pub chains: Vec<String>,
698    pub subchains: Vec<String>,
699    pub operators: Vec<BioAssemblyOperator>,
700}
701
702#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
703pub enum BioAssemblySpecialKind {
704    #[default]
705    NA,
706    CompleteIcosahedral,
707    RepresentativeHelical,
708    CompletePoint,
709}
710
711#[derive(Debug, Clone, PartialEq, Default)]
712pub struct BioAssembly {
713    pub name: String,
714    pub author_determined: bool,
715    pub software_determined: bool,
716    pub special_kind: BioAssemblySpecialKind,
717    pub oligomeric_count: i32,
718    pub oligomeric_details: String,
719    pub software_name: String,
720    pub absa: Option<f64>,
721    pub ssa: Option<f64>,
722    pub more: Option<f64>,
723    pub generators: Vec<BioAssemblyGenerator>,
724}
725
726#[derive(Debug, Clone, Copy, PartialEq)]
727pub struct CrystalCell {
728    pub a: f32,
729    pub b: f32,
730    pub c: f32,
731    pub alpha: f32,
732    pub beta: f32,
733    pub gamma: f32,
734}
735
736#[derive(Debug, Clone, PartialEq)]
737pub struct CrystalInfo {
738    pub cell: CrystalCell,
739    pub spacegroup_hm: Option<String>,
740    pub z_pdb: Option<String>,
741    pub scale: Option<BioTransform>,
742    pub frac: BioTransform,
743    pub orth: BioTransform,
744    pub explicit_matrices: bool,
745    pub cs_count: i16,
746    pub cell_images: Vec<BioTransform>,
747}
748
749// ---------------------------------------------------------------------------
750// Coordinate block
751// ---------------------------------------------------------------------------
752
753/// 3D coordinates for all atoms, indexed by AtomId.
754/// Invariant: `len() == atoms.len()` in the owning BioStructure.
755#[derive(Debug, Clone, PartialEq, Default)]
756pub struct CoordinateBlock {
757    pub(crate) positions: Vec<[f32; 3]>,
758}
759
760impl CoordinateBlock {
761    #[must_use]
762    pub fn positions(&self) -> &[[f32; 3]] {
763        &self.positions
764    }
765}
766
767#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
768pub enum BioCoorFormat {
769    Pdb,
770    Mmcif,
771    Mmjson,
772    ChemComp,
773    #[default]
774    Unknown,
775    Detect,
776}
777
778// ---------------------------------------------------------------------------
779// Top-level BioStructure
780// ---------------------------------------------------------------------------
781
782/// Flat-row biomolecular structure.
783///
784/// Hierarchy: Model → Chain → Residue → Atom (all stored as flat Vecs).
785/// Row ids are snapshot-local indices; PDB/mmCIF source identifiers are stored
786/// in the `source` fields of each row.
787#[derive(Debug, Clone, PartialEq, Default)]
788pub struct BioStructure {
789    pub(crate) name: String,
790    pub(crate) input_format: BioCoorFormat,
791    pub(crate) atoms: Vec<AtomRow>,
792    pub(crate) residues: Vec<ResidueRow>,
793    pub(crate) chains: Vec<ChainRow>,
794    pub(crate) entities: Vec<EntityRow>,
795    pub(crate) models: Vec<ModelRow>,
796    pub(crate) coordinates: CoordinateBlock,
797    pub(crate) metadata: BioMetadata,
798    pub(crate) crystal: Option<CrystalInfo>,
799    pub(crate) resolution: Option<f64>,
800    pub(crate) non_ascii_line: Option<usize>,
801    pub(crate) raw_remarks: Vec<String>,
802    pub(crate) ter_status: char,
803    pub(crate) has_d_fraction: bool,
804    pub(crate) mod_residues: Vec<BioModRes>,
805    pub(crate) shortened_ccd_codes: Vec<(String, String)>,
806    pub(crate) conect_map: HashMap<i32, Vec<i32>>,
807    pub(crate) deferred_conn_records: Vec<String>,
808    pub(crate) connections: Vec<BioConnection>,
809    pub(crate) cispeps: Vec<BioCisPep>,
810    pub(crate) helices: Vec<BioHelix>,
811    pub(crate) sheets: Vec<BioSheet>,
812    pub(crate) remark_290_operators: Vec<String>,
813    pub(crate) assemblies: Vec<BioAssembly>,
814    pub(crate) has_origx: bool,
815    pub(crate) origx: BioTransform,
816    pub(crate) ncs_operators: Vec<BioNcsOperator>,
817    pub(crate) ncs_oper_identity_id: Option<String>,
818}
819
820impl BioStructure {
821    #[must_use]
822    pub fn new() -> Self {
823        Self::default()
824    }
825
826    /// Reads a Gemmi-aligned PDB structural record stream into a `BioStructure`.
827    pub fn from_pdb_str(text: &str) -> Result<Self, crate::io::bio::BioReadError> {
828        Self::from_pdb_str_with_params(text, crate::io::bio::BioPdbReadParams::default())
829    }
830
831    /// Reads a Gemmi-aligned PDB structural record stream with explicit PDB reader parameters.
832    pub fn from_pdb_str_with_params(
833        text: &str,
834        params: crate::io::bio::BioPdbReadParams,
835    ) -> Result<Self, crate::io::bio::BioReadError> {
836        crate::io::bio::read_pdb_bio_structure_from_str_with_params(text, params)
837    }
838
839    /// Reads a Gemmi-aligned mmCIF structural document into a `BioStructure`.
840    pub fn from_mmcif_str(text: &str, path: &str) -> Result<Self, crate::io::bio::BioReadError> {
841        Self::from_str_with_format(text, path, BioCoorFormat::Mmcif)
842    }
843
844    /// Reads a Gemmi-aligned structure by detecting the input format from text.
845    pub fn from_structure_str(
846        text: &str,
847        path: &str,
848    ) -> Result<Self, crate::io::bio::BioReadError> {
849        Self::from_str_with_format(text, path, BioCoorFormat::Detect)
850    }
851
852    /// Reads a Gemmi-aligned structure using the requested coordinate format.
853    pub fn from_str_with_format(
854        text: &str,
855        path: &str,
856        format: BioCoorFormat,
857    ) -> Result<Self, crate::io::bio::BioReadError> {
858        crate::io::bio::read_structure_from_memory(text, path, format)
859    }
860
861    #[must_use]
862    pub fn num_atoms(&self) -> usize {
863        self.atoms.len()
864    }
865
866    #[must_use]
867    pub fn num_residues(&self) -> usize {
868        self.residues.len()
869    }
870
871    #[must_use]
872    pub fn num_chains(&self) -> usize {
873        self.chains.len()
874    }
875
876    #[must_use]
877    pub fn num_models(&self) -> usize {
878        self.models.len()
879    }
880
881    #[must_use]
882    pub fn num_entities(&self) -> usize {
883        self.entities.len()
884    }
885
886    #[must_use]
887    pub fn name(&self) -> &str {
888        &self.name
889    }
890
891    #[must_use]
892    pub fn input_format(&self) -> BioCoorFormat {
893        self.input_format
894    }
895
896    #[must_use]
897    pub fn atoms(&self) -> &[AtomRow] {
898        &self.atoms
899    }
900
901    #[must_use]
902    pub fn residues(&self) -> &[ResidueRow] {
903        &self.residues
904    }
905
906    #[must_use]
907    pub fn chains(&self) -> &[ChainRow] {
908        &self.chains
909    }
910
911    #[must_use]
912    pub fn models(&self) -> &[ModelRow] {
913        &self.models
914    }
915
916    #[must_use]
917    pub fn entities(&self) -> &[EntityRow] {
918        &self.entities
919    }
920
921    #[must_use]
922    pub fn metadata(&self) -> &BioMetadata {
923        &self.metadata
924    }
925
926    #[must_use]
927    pub fn crystal(&self) -> Option<&CrystalInfo> {
928        self.crystal.as_ref()
929    }
930
931    #[must_use]
932    pub fn resolution(&self) -> Option<f64> {
933        self.resolution
934    }
935
936    #[must_use]
937    pub fn ter_status(&self) -> char {
938        self.ter_status
939    }
940
941    #[must_use]
942    pub fn connections(&self) -> &[BioConnection] {
943        &self.connections
944    }
945
946    #[must_use]
947    pub fn cispeps(&self) -> &[BioCisPep] {
948        &self.cispeps
949    }
950
951    #[must_use]
952    pub fn mod_residues(&self) -> &[BioModRes] {
953        &self.mod_residues
954    }
955
956    #[must_use]
957    pub fn assemblies(&self) -> &[BioAssembly] {
958        &self.assemblies
959    }
960
961    #[must_use]
962    pub fn has_origx(&self) -> bool {
963        self.has_origx
964    }
965
966    #[must_use]
967    pub fn origx(&self) -> &BioTransform {
968        &self.origx
969    }
970
971    #[must_use]
972    pub fn ncs_operators(&self) -> &[BioNcsOperator] {
973        &self.ncs_operators
974    }
975
976    #[must_use]
977    pub fn ncs_oper_identity_id(&self) -> Option<&str> {
978        self.ncs_oper_identity_id.as_deref()
979    }
980
981    #[must_use]
982    pub fn coordinates(&self) -> &CoordinateBlock {
983        &self.coordinates
984    }
985
986    #[must_use]
987    pub fn atom_position(&self, atom: AtomId) -> Option<[f32; 3]> {
988        self.coordinates
989            .positions
990            .get(atom.index() as usize)
991            .copied()
992    }
993
994    #[must_use]
995    pub fn residue_atoms(&self, residue: ResidueId) -> Option<&[AtomRow]> {
996        let row = self.residues.get(residue.index() as usize)?;
997        let start = row.atom_span.start as usize;
998        let end = row.atom_span.end() as usize;
999        self.atoms.get(start..end)
1000    }
1001}