1use atom::{Atom, Bond, Molecule, ToMolecule};
2use std::{
3 ffi::OsStr,
4 fs::File,
5 io::{self, BufReader, Read},
6 path::Path,
7 vec,
8};
9
10pub mod atom;
11pub mod cif;
12pub mod mol;
13pub mod mol2;
14pub mod pdb;
15pub mod xyz;
16
17#[cfg(feature = "petgraph")]
27pub fn molecule_from_file(filename: impl AsRef<Path>) -> io::Result<Molecule> {
28 Ok(from_file(filename)?.to_molecule())
29}
30
31pub fn from_file(filename: impl AsRef<Path>) -> io::Result<(Vec<Atom>, Vec<Bond>)> {
41 let file = File::open(&filename)?;
42 let reader = BufReader::new(file);
43
44 match filename.as_ref().extension().and_then(OsStr::to_str) {
45 Some("cif") => parse(reader, FileType::CIF),
46 Some("mol") => parse(reader, FileType::MOL),
47 Some("mol2") => parse(reader, FileType::MOL2),
48 Some("pdb") => parse(reader, FileType::PDB),
49 Some("xyz") => parse(reader, FileType::XYZ),
50 _ => Err(io::Error::new(
51 io::ErrorKind::InvalidInput,
52 "Unsupported file extension",
53 )),
54 }
55}
56
57pub enum FileType {
59 CIF,
60 MOL,
61 MOL2,
62 PDB,
63 XYZ,
64}
65
66pub fn parse<P: Read>(reader: BufReader<P>, type_: FileType) -> io::Result<(Vec<Atom>, Vec<Bond>)> {
82 match type_ {
83 FileType::CIF => cif::parse(reader),
84 FileType::MOL => mol::parse(reader),
85 FileType::MOL2 => mol2::parse(reader),
86 FileType::PDB => Ok((pdb::parse(reader)?, vec![])),
87 FileType::XYZ => Ok((xyz::parse(reader)?, vec![])),
88 }
89}
90
91fn normalize_symbol(symbol: &str) -> String {
92 let normalized_symbol = if let Some(first_char) = symbol.chars().next() {
93 first_char.to_uppercase().collect::<String>() + &symbol[1..].to_lowercase()
94 } else {
95 String::new()
96 };
97 normalized_symbol
98}
99
100#[cfg(test)]
101mod tests {
102 use super::*;
103 use rstest::rstest;
104
105 #[rstest]
106 #[case("data/4n4n.cif", 15450, 14968)]
107 #[case("data/4r21.cif", 6752, 6902)]
108 #[case("data/147288.cif", 206, 230)]
109 #[case("data/1484829.cif", 466, 528)]
110 #[case("data/cif_noTrim.cif", 79, 89)]
111 #[case("data/cif.cif", 79, 89)]
112 #[case("data/CuHETMP.cif", 85, 92)]
113 #[case("data/ligand.cif", 44, 46)]
114 #[case("data/mmcif.cif", 1291, 1256)]
115 #[case("data/benzene_3d.mol", 12, 12)]
116 #[case("data/benzene_arom.mol", 12, 12)]
117 #[case("data/benzene.mol", 6, 6)]
118 #[case("data/tep.mol", 46, 50)]
119 #[case("data/corrole.mol", 37, 41)]
120 #[case("data/0001.mol2", 15450, 14898)]
121 #[case("data/benzene.mol2", 12, 12)]
122 #[case("data/myo.mol2", 1437, 1312)]
123 #[case("data/ptcor.mol2", 129, 127)]
124 #[case("data/tep.mol2", 46, 50)]
125 #[case("data/VATTOC.mol2", 130, 146)]
126 #[case("data/oriluy.pdb", 130, 151)]
127 #[case("data/2spl.pdb", 1437, 1314)]
128 #[case("data/1hv4.pdb", 9288, 9562)]
129 #[case("data/0001.pdb", 15450, 14968)]
130 #[case("data/cif.xyz", 102, 155)]
131 #[case("data/mescho.xyz", 23, 23)]
132 #[case("data/porphyrin.xyz", 37, 44)]
133 fn test_molecule(
134 #[case] filename: &str,
135 #[case] atoms_count: usize,
136 #[case] bonds_count: usize,
137 ) {
138 let mol = molecule_from_file(filename).unwrap();
139 let sub = mol.filter_map(
141 |_, a| {
142 if a.disorder_group != 2 { Some(a) } else { None }
143 },
144 |_, b| Some(b),
145 );
146
147 assert_eq!(sub.node_count(), atoms_count);
148 assert_eq!(sub.edge_count(), bonds_count);
149 }
150}