nom_pdb/
het.rs

1// Copyright (c) 2020 Tianyi Shi
2//
3// This software is released under the MIT License.
4// https://opensource.org/licenses/MIT
5
6use crate::common::parser::FieldParser;
7use crate::common::parser::{
8    jump_newline, parse_residue, parse_right, take_trim_start_own, FieldParserWithModifiedTable,
9};
10use crate::types::{
11    AtomName, Helix, HelixClass, ModifiedAminoAcid, ModifiedNucleotide, Registration,
12    ResidueSerial, SecondaryStructureSerial, Sense, Sheet, Strand,
13};
14use nom::{bytes::complete::take, character::complete::anychar, combinator::map, IResult};
15use std::collections::HashMap;
16
17/// HET records are used to describe non-standard residues, such as prosthetic groups, inhibitors, solvent molecules, and ions for which coordinates are supplied. Groups are considered HET if they are not part of a biological polymer described in SEQRES and considered to be a molecule bound to the polymer, or they are a chemical species that constitute part of a biological polymer and is not one of the following:
18///
19/// - standard amino acids, or
20/// - standard nucleic acids (C, G, A, U, I, DC, DG, DA, DU, DT and DI), or
21/// - unknown amino acid (UNK) or nucleic acid (N) where UNK and N are used to indicate the unknown residue name.
22///
23/// HET records also describe chemical components for which the chemical identity is unknown, in which case the group is assigned the hetID UNL (Unknown Ligand).
24///
25/// The heterogen section of a PDB formatted file contains the complete description of non-standard residues in the entry.
26///
27/// ## Record Format
28///
29/// | COLUMNS | DATA  TYPE  | FIELD                 | DEFINITION                             |
30/// | ------- | ----------- | --------------------- | -------------------------------------- |
31/// | 1 -  6  | Record name | "HET   "              |                                        |
32/// | 8 - 10  | LString(3)  | hetID                 | Het identifier, right-justified.       |
33/// | 13      | Character   | ChainID               | Chain  identifier.                     |
34/// | 14 - 17 | Integer     | seqNum                | Sequence  number.                      |
35/// | 18      | AChar       | iCode                 | Insertion  code.                       |
36/// | 21 - 25 | Integer     | numHetAtoms           | Number of HETATM records for the group |
37/// |         |             | present in the entry. |                                        |
38/// | 31 - 70 | String      | text                  | Text describing Het group.             |
39///
40/// ## Details
41///
42/// - Each HET group is assigned a hetID of not more than three (3) alphanumeric characters. The sequence number, chain identifier, insertion code, and number of coordinate records are given for each occurrence of the HET group in the entry. The chemical name of the HET group is given in the HETNAM record and synonyms for the chemical name are given in the HETSYN records, see ftp://ftp.wwpdb.org/pub/pdb/data/monomers .
43/// - There is a separate HET record for each occurrence of the HET group in an entry.
44/// - A particular HET group is represented in the PDB archive with a unique hetID.
45/// - PDB entries do not have HET records for water molecules, deuterated water, or methanol (when used as solvent).
46/// - Unknown atoms or ions will be represented as UNX with the chemical formula X1.  Unknown ligands are UNL; unknown amino acids are UNK.
47///
48/// ## Verification/Validation/Value Authority Control
49///
50/// For each het group that appears in the entry, the wwPDB checks that the corresponding HET, HETNAM, HETSYN, FORMUL, HETATM, and CONECT records appear, if applicable. The HET record is generated automatically using the Chemical Component Dictionary and information from the HETATM records.
51///
52/// Each unique hetID represents a unique molecule.
53///
54/// ## Relationships to Other Record Types
55///
56/// For each het group that appears in the entry, there must be corresponding HET, HETNAM, HETSYN, FORMUL,HETATM, and CONECT records. LINK records may also be created.
57///
58/// Example
59///
60/// ```ignore
61///          1         2         3         4         5         6         7         8
62/// 12345678901234567890123456789012345678901234567890123456789012345678901234567890
63/// HET     ZN  A  31       1
64/// HET    TRS  B 975       8
65///
66/// HET    UDP  A1457      25
67/// HET    B3P  A1458      19
68///
69/// HET    NAG  Y   3      15
70/// HET    FUC  Y   4      10
71/// HET    NON  Y   5      12
72/// HET    UNK  A 161       1
73/// ```
74struct HetParser; // ? this this useful?
75                  // impl FieldParserWithModifiedTable for HetParser {
76                  //     type Output = ();
77                  //     fn parse<'a>(inp: &'a [u8], modified_aa: &ModifiedAminoAcidTable,
78                  //         modified_nuc: &ModifiedNucleotideTable,) -> IResult<&'a [u8], ()> {
79                  //         let inp = &inp[1..];
80                  //         let (inp, res) = parse_residue(inp, &modified_aa, &modified_nuc)
81                  //     }
82                  // }
83
84/// # HETNAM
85///
86/// ## Overview
87///
88/// This record gives the chemical name of the compound with the given hetID.
89///
90/// Record Format
91///
92/// | COLUMNS | DATA  TYPE   | FIELD        | DEFINITION                                |
93/// | ------- | ------------ | ------------ | ----------------------------------------- |
94/// | 1 -  6  | Record name  | "HETNAM"     |                                           |
95/// | 9 - 10  | Continuation | continuation | Allows concatenation of multiple records. |
96/// | 12 - 14 | LString(3)   | hetID        | Het identifier, right-justified.          |
97/// | 16 - 70 | String       | text         | Chemical name.                            |
98///
99/// ## Details
100///
101/// - Each hetID is assigned a unique chemical name for the HETNAM record, see ftp://ftp.wwpdb.org/pub/pdb/data/monomers.
102/// - Other names for the group are given on HETSYN records.
103/// - PDB entries follow IUPAC/IUB naming conventions to describe groups systematically.
104/// - The special character “~” is used to indicate superscript in a heterogen name. For example: N6 will be listed in the HETNAM section as N~6~, with the ~ character indicating both the start and end of the superscript in the name, e.g., `N-(BENZYLSULFONYL)SERYL-N~1~-{4-[AMINO(IMINO)METHYL]BENZYL}GLYCINAMIDE`
105///
106/// Continuation of chemical names onto subsequent records is allowed.
107/// Only one HETNAM record is included for a given hetID, even if the same hetID appears on more than one HET record.
108///
109/// Verification/Validation/Value Authority Control
110///
111/// For each het group that appears in the entry, the corresponding HET, HETNAM, FORMUL, HETATM, and CONECT records must appear. The HETNAM record is generated automatically using the Chemical Component Dictionary and information from HETATM records.
112///
113/// Relationships to Other Record Types
114///
115/// For each het group that appears in the entry, there must be corresponding HET, HETNAM, FORMUL, HETATM, and CONECT records. HETSYN and LINK records may also be created.
116///
117/// ## Example
118///
119/// ```ignore
120///          1         2         3         4         5         6         7         8
121/// 12345678901234567890123456789012345678901234567890123456789012345678901234567890
122/// HETNAM     NAG N-ACETYL-D-GLUCOSAMINE
123/// HETNAM     SAD BETA-METHYLENE SELENAZOLE-4-CARBOXAMIDE ADENINE
124/// HETNAM  2  SAD DINUCLEOTIDE
125///
126/// HETNAM     UDP URIDINE-5'-DIPHOSPHATE
127///
128/// HETNAM     UNX UNKNOWN ATOM OR ION
129/// HETNAM     UNL UNKNOWN LIGAND
130///
131/// HETNAM     B3P 2-[3-(2-HYDROXY-1,1-DIHYDROXYMETHYL-ETHYLAMINO)-                 
132/// HETNAM   2 B3P  PROPYLAMINO]-2-HYDROXYMETHYL-PROPANE-1,3-DIOL   
133/// ```
134pub struct HetnamParser;
135
136pub struct FormulParser;