lib3dmol/structures/
structure.rs

1use super::atom::{Atom, AtomType};
2use super::chain::{Chain, ChainTypes};
3use super::residue::Residue;
4use super::*;
5
6use selection_atom;
7
8/// A [`Structure`] is the SUPER structure which contain molecules like Protein, DNA, etc.
9///
10pub struct Structure {
11    pub name: String,
12    pub chains: Vec<Chain>,
13    last_chain_added: char,
14}
15
16impl Structure {
17    /// Create a new [`Structure`]
18    ///
19    /// # Examples
20    ///
21    /// ````
22    /// use lib3dmol::structures::structure;
23    ///
24    /// let my_prot = structure::Structure::new(String::from("my_struct"));
25    ///
26    /// ````
27    pub fn new(n: String) -> Structure {
28        Structure {
29            name: n,
30            chains: Vec::new(),
31            last_chain_added: ' ',
32        }
33    }
34
35    /// Get the name of the [`Structure`]
36    ///
37    /// # Examples
38    ///
39    /// ````
40    /// use lib3dmol::structures::structure;
41    ///
42    /// let my_struct = structure::Structure::new(String::from("my_struct"));
43    ///
44    /// assert_eq!("my_struct", my_struct.name());
45    ///
46    /// ````
47    pub fn name(&self) -> &str {
48        &self.name
49    }
50
51    /// Add a new [`Chain`] in the [`Structure`]
52    ///
53    /// # Examples
54    ///
55    /// ````
56    /// use lib3dmol::structures::{structure, chain};
57    ///
58    /// let mut my_struct = structure::Structure::new(String::from("my_struct"));
59    /// let my_chain_a = chain::Chain::new('a', chain::ChainTypes::Lipid);
60    ///
61    /// my_struct.add_chain(my_chain_a);
62    ///
63    /// assert_eq!(1, my_struct.get_chain_number());
64    ///
65    /// ````
66    pub fn add_chain(&mut self, c: Chain) {
67        self.last_chain_added = c.get_name();
68        self.chains.push(c);
69    }
70
71    /// Return True if the [`Chain`] is in the [`Structure`]
72    ///
73    /// # Examples
74    ///
75    /// ````
76    /// use lib3dmol::structures::{structure, chain};
77    ///
78    /// let mut my_struct = structure::Structure::new(String::from("my_struct"));
79    /// let my_chain_a = chain::Chain::new('a', chain::ChainTypes::Protein);
80    /// my_struct.add_chain(my_chain_a);
81    ///
82    /// assert!(my_struct.is_chain('a'));
83    ///
84    /// ````
85    pub fn is_chain(&self, c: char) -> bool {
86        for ii in &self.chains {
87            if ii.get_name() == c {
88                return true;
89            }
90        }
91        false
92    }
93
94    /// Get the number of [`Chain`] in the [`Structure`]
95    ///
96    /// # Examples
97    ///
98    /// ````
99    /// use lib3dmol::structures::structure;
100    ///
101    /// let my_struct = structure::Structure::new(String::from("my_struct"));
102    ///
103    /// assert_eq!(0, my_struct.get_chain_number());
104    /// ````
105    pub fn get_chain_number(&self) -> usize {
106        self.chains.len()
107    }
108
109    /// Return the number of [`Residue`] in the [`Structure`]
110    ///
111    /// According to PDB format, residue can be molecules of amino acid, nucleic acid, lipid or ligand
112    ///
113    /// # Examples
114    ///
115    /// ````
116    /// use lib3dmol::structures::{structure, chain, residue};
117    ///
118    /// let mut my_struct = structure::Structure::new(String::from("my_struct"));
119    /// let mut my_chain = chain::Chain::new('n', chain::ChainTypes::Protein);
120    /// let lys = residue::Residue::new(String::from("lysine"), 1);
121    /// let pro = residue::Residue::new(String::from("proline"), 2);
122    ///
123    /// my_chain.add_res(lys);
124    /// my_chain.add_res(pro);
125    /// my_struct.add_chain(my_chain);
126    ///
127    /// assert_eq!(2, my_struct.get_residue_number());
128    ///
129    /// ````
130    pub fn get_residue_number(&self) -> u64 {
131        let mut n: u64 = 0;
132        for chain in self.chains.iter() {
133            for _ in chain.lst_res.iter() {
134                n += 1;
135            }
136        }
137        n
138    }
139
140    /// Return the number of [`Atom`] in the [`Structure`]
141    ///
142    /// # Examples
143    ///
144    /// ```
145    /// use lib3dmol::parser;
146    ///
147    /// let my_struct = parser::read_pdb("tests/tests_file/f2.pdb", "f2");
148    /// assert_eq!(1085, my_struct.get_atom_number());
149    /// ```
150    pub fn get_atom_number(&self) -> u64 {
151        let mut n: u64 = 0;
152        for chain in self.chains.iter() {
153            for res in chain.lst_res.iter() {
154                for _ in res.lst_atom.iter() {
155                    n += 1;
156                }
157            }
158        }
159        n
160    }
161
162    /// Generate a [`Vector`] of atoms index (u64)
163    /// Can be used in other program like rrmsd_map to select specific atoms
164    ///
165    /// # Examples
166    /// ```
167    /// use lib3dmol::parser;
168    ///
169    /// let my_struct = parser::read_pdb("tests/tests_file/f2_adn.pdb", "f2");
170    /// let atom_index = my_struct.get_atom_index();
171    ///
172    /// assert_eq!(atom_index[0], 1);
173    /// assert_eq!(atom_index[1], 2);
174    /// ```
175    pub fn get_atom_index(&self) -> Vec<u64> {
176        let mut lst: Vec<u64> = Vec::new();
177        for chain in &self.chains {
178            for res in &chain.lst_res {
179                for atom in &res.lst_atom {
180                    lst.push(atom.number);
181                }
182            }
183        }
184        lst
185    }
186
187    /// Return a mutable reference of a [`Chain`] with its name. Return None if the
188    /// [`Chain`] does not exist
189    ///
190    /// # Examples
191    ///
192    /// ````
193    /// use lib3dmol::structures::{structure, chain};
194    ///
195    /// let mut my_struct = structure::Structure::new(String::from("my_struct"));
196    /// my_struct.add_chain(chain::Chain::new('n', chain::ChainTypes::Protein));
197    /// assert_eq!('n', my_struct.chains[0].get_name());
198    /// {
199    ///     let mut reference = my_struct.get_chain_ref('n').unwrap();
200    ///     reference.name = 'a';
201    /// }
202    /// assert_eq!('a', my_struct.chains[0].get_name());
203    /// ````
204    pub fn get_chain_ref(&mut self, c: char) -> Option<&mut Chain> {
205        for chain in &mut self.chains {
206            if chain.name == c {
207                return Some(chain);
208            }
209        }
210        None
211    }
212
213    /// Function that add information on the [`Structure`] (used in the parsing)
214    /// /!\Change this to a macro!
215    ///
216    #[warn(clippy::too_many_arguments)]
217    pub fn update_structure(
218        &mut self,
219        chain: char,
220        res_name: String,
221        res_number: u64,
222        res_icode: Option<char>,
223        atom_name: String,
224        atom_number: u64,
225        a_type: AtomType,
226        coord: [f32; 3],
227        occupancy: Option<f32>,
228        tmp_factor: Option<f32>,
229        element: Option<String>,
230        charge: Option<String>,
231    ) {
232        // Get a chain reference. If the chain exist, return a mutable reference to it. If not,
233        // create a new chain an return the mutable reference
234        let chain = match self.get_chain_ref(chain) {
235            Some(c) => c,
236            None => {
237                let chain_type = ChainTypes::get(&res_name.to_uppercase()[..]);
238                self.add_chain(Chain::new(chain, chain_type));
239                self.get_chain_ref(chain).unwrap()
240            }
241        };
242
243        // Get a residue reference. If the residue exist, return a mutable reference to it. If not,
244        // create a new residue and return it as mutable reference
245        let residue = match chain.get_residue_ref(res_number as u64, res_icode) {
246            Some(r) => r,
247            None => {
248                chain.add_res(Residue{name: res_name, res_num: res_number, res_icode: res_icode, lst_atom: Vec::new() });
249                chain.get_residue_ref(res_number, res_icode).unwrap()
250            }
251        };
252
253        let atom = Atom::new_complete(
254            atom_name,
255            atom_number,
256            coord,
257            a_type,
258            occupancy,
259            tmp_factor,
260            element,
261            charge,
262        );
263        residue.add_atom(atom);
264    }
265
266    /// Select [`Atom`] from a pattern and return a new [`Structure`]
267    ///
268    /// The pattern could use keywords "Chain", "Resid" or "Backbone" (keyword are not case sensitive)
269    ///
270    /// ## "Chain"
271    /// The Chain keyword is used to select chain. It must be follow by one or two chain names separate by the "to" keyword.
272    /// The chain name is case sensitive.
273    /// examples:
274    /// "Chain A" will select only the Chain A.
275    /// "Chain A to D" will select chains A, B, C and D.
276    ///
277    /// ## "Resid"
278    /// The Resid keyword is used to select residues. It must be follow by one or two chain names separate by the "to" keyword.
279    /// In case where the protein has multiple chains, the Resid will return residue(s) for all chains.
280    /// examples:
281    /// "Resid 1" will select only the residue 1 of each chain
282    /// "Resid 12 to 50" will select residues 12, 13, .., 50 for all chains
283    ///
284    /// ## "Backbone"
285    /// The Backbone keyword is used to select atoms in the backbone for each residues. It don't take parameters.
286    ///
287    /// ## Special keyword "and"
288    /// You can use the keyword "and" to separate 2 or more differents selection.
289    /// examples:
290    /// "Chain A and Resid 40 to 150"
291    ///
292    /// # Examples
293    ///
294    /// ```
295    /// use lib3dmol::parser;
296    ///
297    /// let my_struct = parser::read_pdb("tests/tests_file/f2.pdb", "f2");
298    ///
299    /// assert_eq!(66, my_struct.get_residue_number());
300    /// assert_eq!(1085, my_struct.get_atom_number());
301    ///
302    /// let prot_backbone = my_struct.select_atoms("resid 10 to 50 and backbone").unwrap();
303    ///
304    /// assert_eq!(41, prot_backbone.get_residue_number());
305    /// assert_eq!(164, prot_backbone.get_atom_number());
306    /// ```
307    // TODO: The methode is idiot and need to be improve.
308    // ex: don't parse the chain if it's not selected
309    pub fn select_atoms(&self, pattern: &str) -> Option<Structure> {
310        let mut new_struct = Structure::new(self.name.clone());
311
312        let select = match selection_atom::parse_select(&pattern) {
313            Some(x) => x,
314            None => {
315                println!("Can't parse the protein with these attributes");
316                return None;
317            }
318        };
319        for chain in &self.chains {
320            let c_chain = chain.name;
321            for residue in &chain.lst_res {
322                let c_res = residue.res_num;
323                for atom in &residue.lst_atom {
324                    if selection_atom::atom_match(&select, c_chain, c_res, atom.is_backbone) {
325                        new_struct.update_structure(
326                            c_chain,
327                            residue.name.clone(),
328                            c_res,
329                            residue.res_icode,
330                            atom.name.clone(),
331                            atom.number,
332                            atom.a_type.clone(),
333                            atom.coord,
334                            atom.occupancy,
335                            atom.temp_factor,
336                            atom.element.clone(),
337                            atom.charge.clone(),
338                        );
339                    }
340                }
341            }
342        }
343        Some(new_struct)
344    }
345
346    /// Used to clean the dialing of [`Atom`] in the [`Structure`]
347    /// Does not change the dialing of residues.
348    ///
349    ///
350    /// # Examples
351    /// ```
352    /// use lib3dmol::parser;
353    ///
354    /// let my_prot = parser::read_pdb("tests/tests_file/f2.pdb", "f2");
355    /// let mut backbone = my_prot.select_atoms("backbone").unwrap();
356    /// let lst_atom_id = backbone.get_atom_index();
357    /// assert_eq!(1, lst_atom_id[0]);
358    /// assert_eq!(5, lst_atom_id[1]);
359    ///
360    /// backbone.refine_atom_numbering();
361    /// let lst_atom_id = backbone.get_atom_index();
362    /// assert_eq!(1, lst_atom_id[0]);
363    /// assert_eq!(2, lst_atom_id[1]);
364    /// ```
365    pub fn refine_atom_numbering(&mut self) {
366        let mut n_atom = 1;
367        for chain in &mut self.chains {
368            for residue in &mut chain.lst_res {
369                for atom in &mut residue.lst_atom {
370                    atom.number = n_atom;
371                    n_atom += 1;
372                }
373            }
374        }
375    }
376
377    /// Used to remove Hydrogens in the proteins
378    ///
379    ///
380    /// # Examples
381    /// ```
382    /// use lib3dmol::parser;
383    ///
384    /// let mut my_struct = parser::read_pdb("tests/tests_file/f2.pdb", "f2");
385    /// assert_eq!(1085, my_struct.get_atom_number());
386    /// my_struct.remove_h();
387    /// assert_eq!(541, my_struct.get_atom_number());
388    /// ```
389    pub fn remove_h(&mut self) {
390        for chain in &mut self.chains {
391            for residue in &mut chain.lst_res {
392                for index in (0..residue.lst_atom.len()).rev() {
393                    match residue.lst_atom[index].a_type {
394                        AtomType::Hydrogen => residue.remove_atom(residue.lst_atom[index].number),
395                        _ => (),
396                    }
397                }
398            }
399        }
400        self.refine_atom_numbering();
401    }
402}
403
404impl GetAtom for Structure {
405    fn get_atom(&self) -> Vec<&Atom> {
406        let mut lst_atom: Vec<&Atom> = Vec::new();
407        for chain in &self.chains {
408            for res in &chain.lst_res {
409                for atom in &res.lst_atom {
410                    lst_atom.push(&atom)
411                }
412            }
413        }
414        lst_atom
415    }
416    fn compute_weight(&self) -> f32 {
417        self.get_atom().iter().map(|x| x.get_weight()).sum()
418    }
419}