lib3dmol/structures/structure.rs
1use super::atom::{Atom, AtomType};
2use super::chain::{Chain, ChainTypes};
3use super::residue::Residue;
4use super::*;
5
6use selection_atom;
7
8/// A [`Structure`] is the SUPER structure which contain molecules like Protein, DNA, etc.
9///
10pub struct Structure {
11 pub name: String,
12 pub chains: Vec<Chain>,
13 last_chain_added: char,
14}
15
16impl Structure {
17 /// Create a new [`Structure`]
18 ///
19 /// # Examples
20 ///
21 /// ````
22 /// use lib3dmol::structures::structure;
23 ///
24 /// let my_prot = structure::Structure::new(String::from("my_struct"));
25 ///
26 /// ````
27 pub fn new(n: String) -> Structure {
28 Structure {
29 name: n,
30 chains: Vec::new(),
31 last_chain_added: ' ',
32 }
33 }
34
35 /// Get the name of the [`Structure`]
36 ///
37 /// # Examples
38 ///
39 /// ````
40 /// use lib3dmol::structures::structure;
41 ///
42 /// let my_struct = structure::Structure::new(String::from("my_struct"));
43 ///
44 /// assert_eq!("my_struct", my_struct.name());
45 ///
46 /// ````
47 pub fn name(&self) -> &str {
48 &self.name
49 }
50
51 /// Add a new [`Chain`] in the [`Structure`]
52 ///
53 /// # Examples
54 ///
55 /// ````
56 /// use lib3dmol::structures::{structure, chain};
57 ///
58 /// let mut my_struct = structure::Structure::new(String::from("my_struct"));
59 /// let my_chain_a = chain::Chain::new('a', chain::ChainTypes::Lipid);
60 ///
61 /// my_struct.add_chain(my_chain_a);
62 ///
63 /// assert_eq!(1, my_struct.get_chain_number());
64 ///
65 /// ````
66 pub fn add_chain(&mut self, c: Chain) {
67 self.last_chain_added = c.get_name();
68 self.chains.push(c);
69 }
70
71 /// Return True if the [`Chain`] is in the [`Structure`]
72 ///
73 /// # Examples
74 ///
75 /// ````
76 /// use lib3dmol::structures::{structure, chain};
77 ///
78 /// let mut my_struct = structure::Structure::new(String::from("my_struct"));
79 /// let my_chain_a = chain::Chain::new('a', chain::ChainTypes::Protein);
80 /// my_struct.add_chain(my_chain_a);
81 ///
82 /// assert!(my_struct.is_chain('a'));
83 ///
84 /// ````
85 pub fn is_chain(&self, c: char) -> bool {
86 for ii in &self.chains {
87 if ii.get_name() == c {
88 return true;
89 }
90 }
91 false
92 }
93
94 /// Get the number of [`Chain`] in the [`Structure`]
95 ///
96 /// # Examples
97 ///
98 /// ````
99 /// use lib3dmol::structures::structure;
100 ///
101 /// let my_struct = structure::Structure::new(String::from("my_struct"));
102 ///
103 /// assert_eq!(0, my_struct.get_chain_number());
104 /// ````
105 pub fn get_chain_number(&self) -> usize {
106 self.chains.len()
107 }
108
109 /// Return the number of [`Residue`] in the [`Structure`]
110 ///
111 /// According to PDB format, residue can be molecules of amino acid, nucleic acid, lipid or ligand
112 ///
113 /// # Examples
114 ///
115 /// ````
116 /// use lib3dmol::structures::{structure, chain, residue};
117 ///
118 /// let mut my_struct = structure::Structure::new(String::from("my_struct"));
119 /// let mut my_chain = chain::Chain::new('n', chain::ChainTypes::Protein);
120 /// let lys = residue::Residue::new(String::from("lysine"), 1);
121 /// let pro = residue::Residue::new(String::from("proline"), 2);
122 ///
123 /// my_chain.add_res(lys);
124 /// my_chain.add_res(pro);
125 /// my_struct.add_chain(my_chain);
126 ///
127 /// assert_eq!(2, my_struct.get_residue_number());
128 ///
129 /// ````
130 pub fn get_residue_number(&self) -> u64 {
131 let mut n: u64 = 0;
132 for chain in self.chains.iter() {
133 for _ in chain.lst_res.iter() {
134 n += 1;
135 }
136 }
137 n
138 }
139
140 /// Return the number of [`Atom`] in the [`Structure`]
141 ///
142 /// # Examples
143 ///
144 /// ```
145 /// use lib3dmol::parser;
146 ///
147 /// let my_struct = parser::read_pdb("tests/tests_file/f2.pdb", "f2");
148 /// assert_eq!(1085, my_struct.get_atom_number());
149 /// ```
150 pub fn get_atom_number(&self) -> u64 {
151 let mut n: u64 = 0;
152 for chain in self.chains.iter() {
153 for res in chain.lst_res.iter() {
154 for _ in res.lst_atom.iter() {
155 n += 1;
156 }
157 }
158 }
159 n
160 }
161
162 /// Generate a [`Vector`] of atoms index (u64)
163 /// Can be used in other program like rrmsd_map to select specific atoms
164 ///
165 /// # Examples
166 /// ```
167 /// use lib3dmol::parser;
168 ///
169 /// let my_struct = parser::read_pdb("tests/tests_file/f2_adn.pdb", "f2");
170 /// let atom_index = my_struct.get_atom_index();
171 ///
172 /// assert_eq!(atom_index[0], 1);
173 /// assert_eq!(atom_index[1], 2);
174 /// ```
175 pub fn get_atom_index(&self) -> Vec<u64> {
176 let mut lst: Vec<u64> = Vec::new();
177 for chain in &self.chains {
178 for res in &chain.lst_res {
179 for atom in &res.lst_atom {
180 lst.push(atom.number);
181 }
182 }
183 }
184 lst
185 }
186
187 /// Return a mutable reference of a [`Chain`] with its name. Return None if the
188 /// [`Chain`] does not exist
189 ///
190 /// # Examples
191 ///
192 /// ````
193 /// use lib3dmol::structures::{structure, chain};
194 ///
195 /// let mut my_struct = structure::Structure::new(String::from("my_struct"));
196 /// my_struct.add_chain(chain::Chain::new('n', chain::ChainTypes::Protein));
197 /// assert_eq!('n', my_struct.chains[0].get_name());
198 /// {
199 /// let mut reference = my_struct.get_chain_ref('n').unwrap();
200 /// reference.name = 'a';
201 /// }
202 /// assert_eq!('a', my_struct.chains[0].get_name());
203 /// ````
204 pub fn get_chain_ref(&mut self, c: char) -> Option<&mut Chain> {
205 for chain in &mut self.chains {
206 if chain.name == c {
207 return Some(chain);
208 }
209 }
210 None
211 }
212
213 /// Function that add information on the [`Structure`] (used in the parsing)
214 /// /!\Change this to a macro!
215 ///
216 #[warn(clippy::too_many_arguments)]
217 pub fn update_structure(
218 &mut self,
219 chain: char,
220 res_name: String,
221 res_number: u64,
222 res_icode: Option<char>,
223 atom_name: String,
224 atom_number: u64,
225 a_type: AtomType,
226 coord: [f32; 3],
227 occupancy: Option<f32>,
228 tmp_factor: Option<f32>,
229 element: Option<String>,
230 charge: Option<String>,
231 ) {
232 // Get a chain reference. If the chain exist, return a mutable reference to it. If not,
233 // create a new chain an return the mutable reference
234 let chain = match self.get_chain_ref(chain) {
235 Some(c) => c,
236 None => {
237 let chain_type = ChainTypes::get(&res_name.to_uppercase()[..]);
238 self.add_chain(Chain::new(chain, chain_type));
239 self.get_chain_ref(chain).unwrap()
240 }
241 };
242
243 // Get a residue reference. If the residue exist, return a mutable reference to it. If not,
244 // create a new residue and return it as mutable reference
245 let residue = match chain.get_residue_ref(res_number as u64, res_icode) {
246 Some(r) => r,
247 None => {
248 chain.add_res(Residue{name: res_name, res_num: res_number, res_icode: res_icode, lst_atom: Vec::new() });
249 chain.get_residue_ref(res_number, res_icode).unwrap()
250 }
251 };
252
253 let atom = Atom::new_complete(
254 atom_name,
255 atom_number,
256 coord,
257 a_type,
258 occupancy,
259 tmp_factor,
260 element,
261 charge,
262 );
263 residue.add_atom(atom);
264 }
265
266 /// Select [`Atom`] from a pattern and return a new [`Structure`]
267 ///
268 /// The pattern could use keywords "Chain", "Resid" or "Backbone" (keyword are not case sensitive)
269 ///
270 /// ## "Chain"
271 /// The Chain keyword is used to select chain. It must be follow by one or two chain names separate by the "to" keyword.
272 /// The chain name is case sensitive.
273 /// examples:
274 /// "Chain A" will select only the Chain A.
275 /// "Chain A to D" will select chains A, B, C and D.
276 ///
277 /// ## "Resid"
278 /// The Resid keyword is used to select residues. It must be follow by one or two chain names separate by the "to" keyword.
279 /// In case where the protein has multiple chains, the Resid will return residue(s) for all chains.
280 /// examples:
281 /// "Resid 1" will select only the residue 1 of each chain
282 /// "Resid 12 to 50" will select residues 12, 13, .., 50 for all chains
283 ///
284 /// ## "Backbone"
285 /// The Backbone keyword is used to select atoms in the backbone for each residues. It don't take parameters.
286 ///
287 /// ## Special keyword "and"
288 /// You can use the keyword "and" to separate 2 or more differents selection.
289 /// examples:
290 /// "Chain A and Resid 40 to 150"
291 ///
292 /// # Examples
293 ///
294 /// ```
295 /// use lib3dmol::parser;
296 ///
297 /// let my_struct = parser::read_pdb("tests/tests_file/f2.pdb", "f2");
298 ///
299 /// assert_eq!(66, my_struct.get_residue_number());
300 /// assert_eq!(1085, my_struct.get_atom_number());
301 ///
302 /// let prot_backbone = my_struct.select_atoms("resid 10 to 50 and backbone").unwrap();
303 ///
304 /// assert_eq!(41, prot_backbone.get_residue_number());
305 /// assert_eq!(164, prot_backbone.get_atom_number());
306 /// ```
307 // TODO: The methode is idiot and need to be improve.
308 // ex: don't parse the chain if it's not selected
309 pub fn select_atoms(&self, pattern: &str) -> Option<Structure> {
310 let mut new_struct = Structure::new(self.name.clone());
311
312 let select = match selection_atom::parse_select(&pattern) {
313 Some(x) => x,
314 None => {
315 println!("Can't parse the protein with these attributes");
316 return None;
317 }
318 };
319 for chain in &self.chains {
320 let c_chain = chain.name;
321 for residue in &chain.lst_res {
322 let c_res = residue.res_num;
323 for atom in &residue.lst_atom {
324 if selection_atom::atom_match(&select, c_chain, c_res, atom.is_backbone) {
325 new_struct.update_structure(
326 c_chain,
327 residue.name.clone(),
328 c_res,
329 residue.res_icode,
330 atom.name.clone(),
331 atom.number,
332 atom.a_type.clone(),
333 atom.coord,
334 atom.occupancy,
335 atom.temp_factor,
336 atom.element.clone(),
337 atom.charge.clone(),
338 );
339 }
340 }
341 }
342 }
343 Some(new_struct)
344 }
345
346 /// Used to clean the dialing of [`Atom`] in the [`Structure`]
347 /// Does not change the dialing of residues.
348 ///
349 ///
350 /// # Examples
351 /// ```
352 /// use lib3dmol::parser;
353 ///
354 /// let my_prot = parser::read_pdb("tests/tests_file/f2.pdb", "f2");
355 /// let mut backbone = my_prot.select_atoms("backbone").unwrap();
356 /// let lst_atom_id = backbone.get_atom_index();
357 /// assert_eq!(1, lst_atom_id[0]);
358 /// assert_eq!(5, lst_atom_id[1]);
359 ///
360 /// backbone.refine_atom_numbering();
361 /// let lst_atom_id = backbone.get_atom_index();
362 /// assert_eq!(1, lst_atom_id[0]);
363 /// assert_eq!(2, lst_atom_id[1]);
364 /// ```
365 pub fn refine_atom_numbering(&mut self) {
366 let mut n_atom = 1;
367 for chain in &mut self.chains {
368 for residue in &mut chain.lst_res {
369 for atom in &mut residue.lst_atom {
370 atom.number = n_atom;
371 n_atom += 1;
372 }
373 }
374 }
375 }
376
377 /// Used to remove Hydrogens in the proteins
378 ///
379 ///
380 /// # Examples
381 /// ```
382 /// use lib3dmol::parser;
383 ///
384 /// let mut my_struct = parser::read_pdb("tests/tests_file/f2.pdb", "f2");
385 /// assert_eq!(1085, my_struct.get_atom_number());
386 /// my_struct.remove_h();
387 /// assert_eq!(541, my_struct.get_atom_number());
388 /// ```
389 pub fn remove_h(&mut self) {
390 for chain in &mut self.chains {
391 for residue in &mut chain.lst_res {
392 for index in (0..residue.lst_atom.len()).rev() {
393 match residue.lst_atom[index].a_type {
394 AtomType::Hydrogen => residue.remove_atom(residue.lst_atom[index].number),
395 _ => (),
396 }
397 }
398 }
399 }
400 self.refine_atom_numbering();
401 }
402}
403
404impl GetAtom for Structure {
405 fn get_atom(&self) -> Vec<&Atom> {
406 let mut lst_atom: Vec<&Atom> = Vec::new();
407 for chain in &self.chains {
408 for res in &chain.lst_res {
409 for atom in &res.lst_atom {
410 lst_atom.push(&atom)
411 }
412 }
413 }
414 lst_atom
415 }
416 fn compute_weight(&self) -> f32 {
417 self.get_atom().iter().map(|x| x.get_weight()).sum()
418 }
419}