bio_files/
lib.rs

1#![allow(confusable_idents)]
2#![allow(mixed_script_confusables)]
3
4//! The `generic` label in names in this module are to differentiate from ones used in more specific
5//! applications.
6
7pub mod mmcif;
8pub mod mol2;
9pub mod pdbqt;
10pub mod sdf;
11
12pub mod ab1;
13pub mod map;
14
15pub mod dat;
16pub mod frcmod;
17pub mod md_params;
18
19mod mmcif_aux;
20
21use std::{
22    fmt,
23    fmt::{Display, Formatter},
24    io,
25    io::ErrorKind,
26    str::FromStr,
27};
28
29pub use ab1::*;
30use lin_alg::f64::Vec3;
31pub use map::*;
32pub use mmcif::*;
33pub use mol2::*;
34use na_seq::{AminoAcid, AtomTypeInRes, Element};
35pub use pdbqt::Pdbqt;
36pub use sdf::*;
37
38#[derive(Clone, Debug, Default)]
39pub struct AtomGeneric {
40    pub serial_number: u32,
41    pub posit: Vec3,
42    pub element: Element,
43    /// e.g. "CG1", "CA", "O", "C", "HA", "CD", "C9" etc.
44    pub type_in_res: Option<AtomTypeInRes>,
45    /// E.g. "c6", "ca", "n3", "ha", "h0" etc, as seen in Mol2 files from AMBER.
46    /// e.g.: "ha": hydrogen attached to an aromatic carbon.
47    /// "ho": hydrogen on a hydroxyl oxygen
48    /// "n3": sp³ nitrogen with three substitutes
49    /// "c6": sp² carbon in a pure six-membered aromatic ring (new in GAFF2; lets GAFF distinguish
50    /// a benzene carbon from other aromatic caca carbons)
51    /// For proteins, this appears to be the same as for `name`.
52    pub force_field_type: Option<String>,
53    /// An atom-centered electric charge, used in molecular dynamics simulations.
54    /// These are sometimes loaded from Mol2 or SDF files, and sometimes added after.
55    pub partial_charge: Option<f32>,
56    /// Indicates, in proteins, that the atom isn't part of an amino acid. E.g., water or
57    /// ligands.
58    pub hetero: bool,
59    pub occupancy: Option<f32>,
60}
61
62impl Display for AtomGeneric {
63    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
64        let ff_type = match &self.force_field_type {
65            Some(f) => f,
66            None => "None",
67        };
68
69        let q = match &self.partial_charge {
70            Some(q_) => format!("{q_:.3}"),
71            None => "None".to_string(),
72        };
73
74        write!(
75            f,
76            "Atom {}: {}, {}. {:?}, ff: {ff_type}, q: {q}",
77            self.serial_number,
78            self.element.to_letter(),
79            self.posit,
80            self.type_in_res,
81        )?;
82
83        if self.hetero {
84            write!(f, ", Het")?;
85        }
86
87        Ok(())
88    }
89}
90
91/// These are the Mol2 standard types, unless otherwise noted.
92#[derive(Clone, Copy, Debug, PartialEq, Eq)]
93pub enum BondType {
94    Single,
95    Double,
96    Triple,
97    Aromatic,
98    Amide,
99    Dummy,
100    Unknown,
101    NotConnected,
102    /// mmCIF, rare
103    Quadruple,
104    /// mmCIF. Distinct from aromatic; doesn't need to be a classic ring.
105    Delocalized,
106    /// mmCif; mostly for macromolecular components
107    PolymericLink,
108}
109
110impl Display for BondType {
111    /// Return the exact MOL2 bond-type token as an owned `String`.
112    /// (Use `&'static str` if you never need it allocated.)
113    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
114        let name = match self {
115            Self::Single => "1",
116            Self::Double => "2",
117            Self::Triple => "3",
118            Self::Aromatic => "ar",
119            Self::Amide => "am",
120            Self::Dummy => "du",
121            Self::Unknown => "un",
122            Self::NotConnected => "nc",
123            Self::Quadruple => "quad",
124            Self::Delocalized => "delo",
125            Self::PolymericLink => "poly",
126        };
127
128        write!(f, "{name}")
129    }
130}
131
132impl BondType {
133    /// SDF format uses a truncated set, and does things like mark every other
134    /// aromatic bond as double.
135    pub fn to_str_sdf(&self) -> String {
136        match self {
137            Self::Single | Self::Double | Self::Triple => *self,
138            _ => Self::Single,
139        }
140        .to_string()
141    }
142}
143
144impl FromStr for BondType {
145    type Err = io::Error;
146
147    /// Can ingest from mol2, SDF, and mmCIF formats.
148    fn from_str(s: &str) -> Result<Self, Self::Err> {
149        match s.trim().to_lowercase().as_str() {
150            "1" | "sing" => Ok(BondType::Single),
151            "2" | "doub" => Ok(BondType::Double),
152            "3" | "trip" => Ok(BondType::Triple),
153            "4" | "ar" | "arom" => Ok(BondType::Aromatic),
154            "am" => Ok(BondType::Amide),
155            "du" => Ok(BondType::Dummy),
156            "un" => Ok(BondType::Unknown),
157            "nc" => Ok(BondType::NotConnected),
158            "quad" => Ok(BondType::Quadruple),
159            "delo" => Ok(BondType::Delocalized),
160            "poly" => Ok(BondType::PolymericLink),
161            _ => Err(io::Error::new(
162                ErrorKind::InvalidData,
163                format!("Invalid BondType: {s}"),
164            )),
165        }
166    }
167}
168
169#[derive(Clone, Debug)]
170pub struct BondGeneric {
171    pub bond_type: BondType,
172    pub atom_0_sn: u32,
173    pub atom_1_sn: u32,
174}
175
176#[derive(Debug, Clone, PartialEq)]
177pub enum ResidueType {
178    AminoAcid(AminoAcid),
179    Water,
180    Other(String),
181}
182
183impl Display for ResidueType {
184    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
185        let name = match &self {
186            ResidueType::Other(n) => n.clone(),
187            ResidueType::Water => "Water".to_string(),
188            ResidueType::AminoAcid(aa) => aa.to_string(),
189        };
190
191        write!(f, "{name}")
192    }
193}
194
195impl Default for ResidueType {
196    fn default() -> Self {
197        Self::Other(String::new())
198    }
199}
200
201impl ResidueType {
202    /// Parses from the "name" field in common text-based formats lik CIF, PDB, and PDBQT.
203    pub fn from_str(name: &str) -> Self {
204        if name.to_uppercase() == "HOH" {
205            ResidueType::Water
206        } else {
207            match AminoAcid::from_str(name) {
208                Ok(aa) => ResidueType::AminoAcid(aa),
209                Err(_) => ResidueType::Other(name.to_owned()),
210            }
211        }
212    }
213}
214
215#[derive(Debug, Clone)]
216pub struct ResidueGeneric {
217    /// We use serial number of display, search etc, and array index to select. Residue serial number is not
218    /// unique in the molecule; only in the chain.
219    pub serial_number: u32,
220    pub res_type: ResidueType,
221    /// Serial number
222    pub atom_sns: Vec<u32>,
223    pub end: ResidueEnd,
224}
225
226#[derive(Clone, Copy, PartialEq, Debug)]
227pub enum ResidueEnd {
228    Internal,
229    NTerminus,
230    CTerminus,
231    /// Not part of a protein/polypeptide.
232    Hetero,
233}
234
235#[derive(Debug, Clone)]
236pub struct ChainGeneric {
237    pub id: String,
238    // todo: Do we want both residues and atoms stored here? It's an overconstraint.
239    /// Serial number
240    pub residue_sns: Vec<u32>,
241    /// Serial number
242    pub atom_sns: Vec<u32>,
243}
244
245#[derive(Clone, Copy, Debug, PartialEq)]
246pub enum SecondaryStructure {
247    Helix,
248    Sheet,
249    Coil,
250}
251
252#[derive(Clone, Debug)]
253/// See note elsewhere regarding serial numbers vs indices: In your downstream applications, you may
254/// wish to convert sns to indices, for faster operations.
255pub struct BackboneSS {
256    /// Atom serial numbers.
257    pub start_sn: u32,
258    pub end_sn: u32,
259    pub sec_struct: SecondaryStructure,
260}
261
262#[derive(Clone, Copy, PartialEq, Debug)]
263/// The method used to find a given molecular structure. This data is present in mmCIF files
264/// as the `_exptl.method` field.
265pub enum ExperimentalMethod {
266    XRayDiffraction,
267    ElectronDiffraction,
268    NeutronDiffraction,
269    /// i.e. Cryo-EM
270    ElectronMicroscopy,
271    SolutionNmr,
272}
273
274impl ExperimentalMethod {
275    /// E.g. for displaying in the space-constrained UI.
276    pub fn to_str_short(&self) -> String {
277        match self {
278            Self::XRayDiffraction => "X-ray",
279            Self::NeutronDiffraction => "ND",
280            Self::ElectronDiffraction => "ED",
281            Self::ElectronMicroscopy => "EM",
282            Self::SolutionNmr => "NMR",
283        }
284        .to_owned()
285    }
286}
287
288impl Display for ExperimentalMethod {
289    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
290        let val = match self {
291            Self::XRayDiffraction => "X-Ray diffraction",
292            Self::NeutronDiffraction => "Neutron diffraction",
293            Self::ElectronDiffraction => "Electron diffraction",
294            Self::ElectronMicroscopy => "Electron microscopy",
295            Self::SolutionNmr => "Solution NMR",
296        };
297        write!(f, "{val}")
298    }
299}
300
301impl FromStr for ExperimentalMethod {
302    type Err = io::Error;
303
304    /// Parse an mmCIF‐style method string into an ExperimentalMethod.
305    fn from_str(s: &str) -> Result<Self, Self::Err> {
306        let normalized = s.to_lowercase();
307        let s = normalized.trim();
308        let method = match s {
309            "x-ray diffraction" => ExperimentalMethod::XRayDiffraction,
310            "neutron diffraction" => ExperimentalMethod::NeutronDiffraction,
311            "electron diffraction" => ExperimentalMethod::ElectronDiffraction,
312            "electron microscopy" => ExperimentalMethod::ElectronMicroscopy,
313            "solution nmr" => ExperimentalMethod::SolutionNmr,
314            other => {
315                return Err(io::Error::new(
316                    ErrorKind::InvalidData,
317                    format!("Error parsing experimental method: {other}"),
318                ));
319            }
320        };
321        Ok(method)
322    }
323}