lib3dmol/parser/
read_pdb.rs

1use std::fs::File;
2use std::io::prelude::*;
3use std::io::BufReader;
4use std::process;
5
6use crate::structures::atom::AtomType;
7use crate::structures::structure::Structure;
8
9/// Parse the string to return a f32. The `trim` is used to remove
10/// /n and spaces.
11///
12/// # Errors
13/// Will return 0.0 if the String cannot be convert and print the error
14///
15fn parse_float(s: &str) -> f32 {
16    match s.trim().parse::<f32>() {
17        Ok(n) => n,
18        Err(e) => {
19            println!("{}", e);
20            0.0
21        }
22    }
23}
24
25/// Parse the string to return a i64. The `trim` is used to remove
26/// /n and spaces.
27/// In large PDB, atom number can be > 99,999.
28/// In VMD, the atom number is in hexadecimal after 99,999. Using the
29/// from_str_radix function to convert hexa -> i64
30///
31/// # Errors
32/// Will return 0 if the String cannot be convert and print the error
33///
34fn parse_int(s: &str) -> i64 {
35    match s.trim().parse::<i64>() {
36        Ok(n) => n,
37        Err(e) => match i64::from_str_radix(s.trim(), 16) {
38            Ok(n) => n,
39            Err(_) => {
40                println!("{}", e);
41                0
42            }
43        },
44    }
45}
46
47/// Parse char to check if it is a alphanumeric character or not
48/// Return an option<char>
49///
50fn parse_char(s: char) -> Option<char> {
51    match s {
52        c if c.is_ascii_alphanumeric() => Some(c),
53        _ => None,
54    }
55}
56
57/// Parse the pdb file and return a [`Structure`]
58///
59/// # Examples
60/// ```
61/// use lib3dmol::parser;
62/// let my_struct = parser::read_pdb("tests/tests_file/f2.pdb", "f2");
63/// assert_eq!(66, my_struct.get_residue_number());
64/// ```
65pub fn read_pdb(pdb: &str, name: &str) -> Structure {
66    // Check if the file exist and/or can be read
67    let pdb = match File::open(pdb) {
68        Ok(f) => f,
69        Err(e) => {
70            eprintln!("Could not open the file \"{}\"\nError: {}", pdb, e);
71            process::exit(1);
72        }
73    };
74
75    let reader = BufReader::new(pdb);
76    let mut structure = Structure::new(name.to_string());
77
78    for line in reader.lines() {
79        let l = line.unwrap();
80        update_from_line(&mut structure, &l);
81    }
82    structure
83}
84
85/// Read the text in the [`String`] or [`&str`] in the PDB format and return a Structure
86///
87/// # Examples
88/// ```
89/// use lib3dmol::parser;
90/// let res = "ATOM      1  N   ALA     2      -0.677  -1.230  -0.491  1.00  0.00           N
91/// ATOM      2  CA  ALA     2      -0.001   0.064  -0.491  1.00  0.00           C
92/// ATOM      3  C   ALA     2       1.499  -0.110  -0.491  1.00  0.00           C
93/// ATOM      4  O   ALA     2       2.030  -1.227  -0.502  1.00  0.00           O
94/// ATOM      5  CB  ALA     2      -0.509   0.856   0.727  1.00  0.00           C
95/// ATOM      6  H   ALA     2      -0.131  -2.162  -0.491  1.00  0.00           H
96/// ATOM      7  HA  ALA     2      -0.269   0.603  -1.418  1.00  0.00           H
97/// ATOM      8 1HB  ALA     2      -1.605   1.006   0.691  1.00  0.00           H
98/// ATOM      9 2HB  ALA     2      -0.285   0.342   1.681  1.00  0.00           H
99/// ATOM     10 3HB  ALA     2      -0.053   1.861   0.784  1.00  0.00           H
100/// TER
101/// END";
102///
103/// let my_alanine = parser::read_pdb_txt(res, "alanine");
104///
105/// assert_eq!(my_alanine.get_atom_number(), 10);
106/// ```
107pub fn read_pdb_txt(text: &str, name: &str) -> Structure {
108    let mut structure = Structure::new(name.to_string());
109    for line in text.lines() {
110        update_from_line(&mut structure, &line);
111    }
112    structure
113}
114
115fn update_from_line(structure: &mut Structure, l: &str) {
116    if l.starts_with("HETAM") || l.starts_with("ATOM") {
117        // Set the 4 variables to their values per default
118        // Edit them after if they are presents in the file
119        let mut occupancy = None;
120        let mut temp_factor = None;
121        let mut element = None;
122        let mut charge = None;
123
124        // First get the resname.
125        // If the "residue" is a amino acid, continue to parse the line and add informations to the protein
126        // else continue to the next one line
127        let residue_name = &l[17..20].trim();
128        let atom_name = &l[12..16];
129        let atom_type = parse_atom(&atom_name);
130        let chain = l[21..22].chars().next().unwrap();
131        let atom_number = parse_int(&l[6..11]);
132        let residue_number = parse_int(&l[22..26]);
133        let res_icode = parse_char(l.chars().nth(26).unwrap());
134        let x = parse_float(&l[30..38]);
135        let y = parse_float(&l[38..46]);
136        let z = parse_float(&l[46..54]);
137
138        // Attribute values according to the length of the line
139        match l.len() {
140            x if x > 60 => occupancy = Some(parse_float(&l[54..60])),
141            x if x > 66 => temp_factor = Some(parse_float(&l[60..66])),
142            x if x > 78 => element = Some(l[76..78].to_string()),
143            x if x > 80 => charge = Some(l[78..80].to_string()),
144            _ => (), // Not reachable
145        };
146        // Add informations to the Structure
147        structure.update_structure(
148            chain,
149            residue_name.to_string(),
150            residue_number as u64,
151            res_icode,
152            atom_name.to_string(),
153            atom_number as u64,
154            atom_type,
155            [x, y, z],
156            occupancy,
157            temp_factor,
158            element,
159            charge,
160        );
161    }
162}
163
164/// Parse a 4 char string lenght corresponding  at the column 12 to 16 in the PDB format
165/// These columns contains informations on the Atom type
166/// The symbol of the atom is in the 2 first char exepting for Hydrogen atoms
167fn parse_atom(txt: &str) -> AtomType {
168    let symbol = &txt[0..2];
169
170    match symbol {
171        " H" => AtomType::Hydrogen,
172        "LI" => AtomType::Lithium,
173        "BE" => AtomType::Beryllium,
174        " B" => AtomType::Boron,
175        " C" => AtomType::Carbon,
176        " N" => AtomType::Nitrogen,
177        " O" => AtomType::Oxygen,
178        " F" => AtomType::Fluorine,
179        "NE" => AtomType::Neon,
180        "NA" => AtomType::Sodium,
181        "MG" => AtomType::Magnesium,
182        "AL" => AtomType::Aluminum,
183        "SI" => AtomType::Silicon,
184        " P" => AtomType::Phosphorus,
185        " S" => AtomType::Sulfur,
186        "CL" => AtomType::Chlorine,
187        "AR" => AtomType::Argon,
188        " K" => AtomType::Potassium,
189        "CA" => AtomType::Calcium,
190        "SC" => AtomType::Scandium,
191        "TI" => AtomType::Titanium,
192        " V" => AtomType::Vanadium,
193        "CR" => AtomType::Chromium,
194        "MN" => AtomType::Manganese,
195        "FE" => AtomType::Iron,
196        "CO" => AtomType::Cobalt,
197        "NI" => AtomType::Nickel,
198        "CU" => AtomType::Copper,
199        "ZN" => AtomType::Zinc,
200        "GA" => AtomType::Gallium,
201        "GE" => AtomType::Germanium,
202        "AS" => AtomType::Arsenic,
203        "SE" => AtomType::Selenium,
204        "BR" => AtomType::Bromine,
205        "KR" => AtomType::Krypton,
206        "RB" => AtomType::Rubidium,
207        "SR" => AtomType::Strontium,
208        " Y" => AtomType::Yttrium,
209        "ZR" => AtomType::Zirconium,
210        "NB" => AtomType::Niobium,
211        "MO" => AtomType::Molybdenum,
212        "TC" => AtomType::Technetium,
213        "RU" => AtomType::Ruthenium,
214        "RH" => AtomType::Rhodium,
215        "PD" => AtomType::Palladium,
216        "AG" => AtomType::Silver,
217        "CD" => AtomType::Cadmium,
218        "IN" => AtomType::Indium,
219        "SN" => AtomType::Tin,
220        "SB" => AtomType::Antimony,
221        "TE" => AtomType::Tellurium,
222        " I" => AtomType::Iodine,
223        "XE" => AtomType::Xenon,
224        "CS" => AtomType::Cesium,
225        "BA" => AtomType::Barium,
226        "LA" => AtomType::Lanthanum,
227        "CE" => AtomType::Cerium,
228        "PR" => AtomType::Praseodymium,
229        "ND" => AtomType::Neodymium,
230        "PM" => AtomType::Promethium,
231        "SM" => AtomType::Samarium,
232        "EU" => AtomType::Europium,
233        "GD" => AtomType::Gadolinium,
234        "TB" => AtomType::Terbium,
235        "DY" => AtomType::Dysprosium,
236        "ER" => AtomType::Erbium,
237        "TM" => AtomType::Thulium,
238        "YB" => AtomType::Ytterbium,
239        "LU" => AtomType::Lutetium,
240        "TA" => AtomType::Tantalum,
241        " W" => AtomType::Tungsten,
242        "RE" => AtomType::Rhenium,
243        "OS" => AtomType::Osmium,
244        "IR" => AtomType::Iridium,
245        "PT" => AtomType::Platinum,
246        "AU" => AtomType::Gold,
247        "TL" => AtomType::Thallium,
248        "PB" => AtomType::Lead,
249        "BI" => AtomType::Bismuth,
250        "PO" => AtomType::Polonium,
251        "AT" => AtomType::Astatine,
252        "RN" => AtomType::Radon,
253        "FR" => AtomType::Francium,
254        "RA" => AtomType::Radium,
255        "AC" => AtomType::Actinium,
256        "TH" => AtomType::Thorium,
257        "PA" => AtomType::Protactinium,
258        " U" => AtomType::Uranium,
259        "NP" => AtomType::Neptunium,
260        "PU" => AtomType::Plutonium,
261        "AM" => AtomType::Americium,
262        "CM" => AtomType::Curium,
263        "BK" => AtomType::Berkelium,
264        "CF" => AtomType::Californium,
265        "ES" => AtomType::Einsteinium,
266        "FM" => AtomType::Fermium,
267        "MD" => AtomType::Mendelevium,
268        "NO" => AtomType::Nobelium,
269        "LR" => AtomType::Lawrencium,
270        "RF" => AtomType::Rutherfordium,
271        "DB" => AtomType::Dubnium,
272        "SG" => AtomType::Seaborgium,
273        "BH" => AtomType::Bohrium,
274        "MT" => AtomType::Meitnerium,
275        _ => {
276            let next_chars: Vec<char> = txt.chars().collect();
277            match next_chars[1] {
278                _ if next_chars[1].is_digit(10) => AtomType::Hydrogen,
279                _ if next_chars[2].is_digit(10) => AtomType::Hydrogen,
280                'G' => AtomType::Mercury,
281                'E' => AtomType::Helium,
282                'O' => AtomType::Holmium,
283                'F' => AtomType::Hafnium,
284                'S' => AtomType::Hassium,
285                _ => AtomType::Unknown,
286            }
287        }
288    }
289}