chelate/
lib.rs

1use atom::{Atom, Bond, Molecule, ToMolecule};
2use std::{
3    ffi::OsStr,
4    fs::File,
5    io::{self, BufReader, Read},
6    path::Path,
7    vec,
8};
9
10pub mod atom;
11pub mod cif;
12pub mod mol;
13pub mod mol2;
14pub mod pdb;
15pub mod xyz;
16
17/// Parses a file based on the FileType and returns a Molecule type.
18/// # Examples
19/// ```
20/// use chelate;
21/// let mol = chelate::molecule_from_file("data/oriluy.pdb").unwrap();
22///
23/// assert_eq!(mol.node_count(), 130);
24/// assert_eq!(mol.edge_count(), 151);
25/// ```
26#[cfg(feature = "petgraph")]
27pub fn molecule_from_file(filename: impl AsRef<Path>) -> io::Result<Molecule> {
28    Ok(from_file(filename)?.to_molecule())
29}
30
31/// Parses a file based on the FileType and returns a vector of `Atom` and a vector of `Bond` objects.
32/// # Examples
33/// ```
34/// use chelate;
35/// let (atoms, bonds) = chelate::from_file("data/147288.cif").unwrap();
36///
37/// assert_eq!(atoms.len(), 206);
38/// assert_eq!(bonds.len(), 230);
39/// ```
40pub fn from_file(filename: impl AsRef<Path>) -> io::Result<(Vec<Atom>, Vec<Bond>)> {
41    let file = File::open(&filename)?;
42    let reader = BufReader::new(file);
43
44    match filename.as_ref().extension().and_then(OsStr::to_str) {
45        Some("cif") => parse(reader, FileType::CIF),
46        Some("mol") => parse(reader, FileType::MOL),
47        Some("mol2") => parse(reader, FileType::MOL2),
48        Some("pdb") => parse(reader, FileType::PDB),
49        Some("xyz") => parse(reader, FileType::XYZ),
50        _ => Err(io::Error::new(
51            io::ErrorKind::InvalidInput,
52            "Unsupported file extension",
53        )),
54    }
55}
56
57/// Enum to declare one of the supported chemical filetypes
58pub enum FileType {
59    CIF,
60    MOL,
61    MOL2,
62    PDB,
63    XYZ,
64}
65
66/// Parses a file based on the FileType and returns a vector of `Atom` and a vector of `Bond` objects.
67/// # Examples
68/// ```
69/// use chelate;
70/// use chelate::FileType;
71/// use std::fs::File;
72/// use std::io::BufReader;
73///
74/// let file = File::open("data/147288.cif").unwrap();
75/// let reader = BufReader::new(file);
76/// let (atoms, bonds) = chelate::parse(reader, FileType::CIF).unwrap();
77///
78/// assert_eq!(atoms.len(), 206);
79/// assert_eq!(bonds.len(), 230);
80/// ```
81pub fn parse<P: Read>(reader: BufReader<P>, type_: FileType) -> io::Result<(Vec<Atom>, Vec<Bond>)> {
82    match type_ {
83        FileType::CIF => cif::parse(reader),
84        FileType::MOL => mol::parse(reader),
85        FileType::MOL2 => mol2::parse(reader),
86        FileType::PDB => Ok((pdb::parse(reader)?, vec![])),
87        FileType::XYZ => Ok((xyz::parse(reader)?, vec![])),
88    }
89}
90
91fn normalize_symbol(symbol: &str) -> String {
92    let normalized_symbol = if let Some(first_char) = symbol.chars().next() {
93        first_char.to_uppercase().collect::<String>() + &symbol[1..].to_lowercase()
94    } else {
95        String::new()
96    };
97    normalized_symbol
98}
99
100#[cfg(test)]
101mod tests {
102    use super::*;
103    use rstest::rstest;
104
105    #[rstest]
106    #[case("data/4n4n.cif", 15450, 14968)]
107    #[case("data/4r21.cif", 6752, 6902)]
108    #[case("data/147288.cif", 206, 230)]
109    #[case("data/1484829.cif", 466, 528)]
110    #[case("data/cif_noTrim.cif", 79, 89)]
111    #[case("data/cif.cif", 79, 89)]
112    #[case("data/CuHETMP.cif", 85, 92)]
113    #[case("data/ligand.cif", 44, 46)]
114    #[case("data/mmcif.cif", 1291, 1256)]
115    #[case("data/benzene_3d.mol", 12, 12)]
116    #[case("data/benzene_arom.mol", 12, 12)]
117    #[case("data/benzene.mol", 6, 6)]
118    #[case("data/tep.mol", 46, 50)]
119    #[case("data/corrole.mol", 37, 41)]
120    #[case("data/0001.mol2", 15450, 14898)]
121    #[case("data/benzene.mol2", 12, 12)]
122    #[case("data/myo.mol2", 1437, 1312)]
123    #[case("data/ptcor.mol2", 129, 127)]
124    #[case("data/tep.mol2", 46, 50)]
125    #[case("data/VATTOC.mol2", 130, 146)]
126    #[case("data/oriluy.pdb", 130, 151)]
127    #[case("data/2spl.pdb", 1437, 1314)]
128    #[case("data/1hv4.pdb", 9288, 9562)]
129    #[case("data/0001.pdb", 15450, 14968)]
130    #[case("data/cif.xyz", 102, 155)]
131    #[case("data/mescho.xyz", 23, 23)]
132    #[case("data/porphyrin.xyz", 37, 44)]
133    fn test_molecule(
134        #[case] filename: &str,
135        #[case] atoms_count: usize,
136        #[case] bonds_count: usize,
137    ) {
138        let mol = molecule_from_file(filename).unwrap();
139        //filter disorder same as in cif.rs
140        let sub = mol.filter_map(
141            |_, a| {
142                if a.disorder_group != 2 { Some(a) } else { None }
143            },
144            |_, b| Some(b),
145        );
146        
147        assert_eq!(sub.node_count(), atoms_count);
148        assert_eq!(sub.edge_count(), bonds_count);
149    }
150}