nom_pdb/
lib.rs

1// Copyright (c) 2020 Tianyi Shi
2//
3// This software is released under the MIT License.
4// https://opensource.org/licenses/MIT
5
6//! # nom-pdb
7//!
8//! A PDB (Protein Data Bank) file parser implemented with nom.
9//!
10//! See [github repository](https://github.com/TianyiShi2001/nom-pdb) for examples.
11
12pub mod common;
13pub mod complete;
14pub mod coordinate;
15pub mod crystallography;
16pub mod het;
17pub mod primary_structure;
18pub mod remark;
19pub mod secondary_structure;
20pub mod title_section;
21pub(crate) mod types;
22
23pub use complete::Parser;
24
25// /// http://www.wwpdb.org/documentation/file-format-content/format33/sect1.html
26// #[derive(Eq, PartialEq, Debug, Ord, PartialOrd)]
27// enum Fields {
28//     Header,  // M
29//     Obslte,  // O : Mandatory in entries that have been replaced by a newer entry.
30//     Title,   // M
31//     Split, //   O : Mandatory when  large macromolecular complexes are split into multiple PDB entries.
32//     Caveat, // O : Mandatory when there are outstanding errors such as chirality.
33//     Compnd, // M
34//     Source, // M
35//     Keywds, // M
36//     Expdta, // M
37//     Nummdl, // O : Mandatory for  NMR ensemble entries.
38//     Mdltyp, // O : Mandatory for  NMR minimized average Structures or when the entire  polymer chain contains C alpha or P atoms only.
39//     Author, // M
40//     Revdat, // M
41//     Sprsde, // O : Mandatory for a replacement entry.
42//     Jrnl,   //   O: Mandatory for a publication describes the experiment.
43//     Remark0, // O : Mandatory for a re-refined structure
44//     Remark1, // O
45//     Remark2, // M
46//     Remark3, // M
47//     RemarkN, // O : Mandatory under certain conditions
48//     Dbref,  //  O : Mandatory for all polymers.
49//     Dbref1,
50//     Dbref2, // O : Mandatory when certain sequence database accession and/or sequence numbering does not fit preceding DBREF format.
51//     SeqAdv, // O : Mandatory if sequence  conflict exists.
52//     SeqRes, // O : Mandatory if ATOM records exist.
53//     Modres, // O : Mandatory if modified group exists in the coordinates.
54//     Het,    // O : Mandatory if a non-standard group other than water appears in the coordinates.
55//     Hetnam, // O : Mandatory if a non-standard group otherthan  water appears in the coordinates.
56//     Hetsyn, // O
57//     Formul, // O : Mandatory if a non-standard group or water appears in the coordinates.
58//     Helix,  // O
59//     Sheet,  // O
60//     Ssbond, // O : Mandatory if a  disulfide bond is present.
61//     Link,   // O : Mandatory if  non-standard residues appear in a polymer
62//     Cispep, // O
63//     Site,   // O
64//     Cryst1, // M
65//     Origx1, // M
66//     Origx2, // M
67//     Origx3, // M
68//     Scale1, // M
69//     Scale2, // M
70//     Scale3, // M
71//     Mtrix1, // O Mandatory if  the complete asymmetric unit
72//     Mtrix2, // O must  be generated from the given coordinates
73//     Mtrix3, // O using non-crystallographic symmetry.
74//     Model,  // O : Mandatory if more than one model is present in the entry.
75//     Atom,   // O : Mandatory if standard residues exist.
76//     Anisou, // O
77//     Ter,    // O : Mandatory if ATOM records exist.
78//     Hetatm, // O : Mandatory if non-standard group exists.
79//     Endmdl, // O : Mandatory if MODEL appears.
80//     Conect, // O : Mandatory if non-standard group appears and if LINK or SSBOND records exist.
81//     Master, // M
82//     End,    // M
83// }
84
85// #[derive(Eq, PartialEq, Debug, Ord, PartialOrd)]
86// enum Section {
87//     Title, // HEADER,  OBSLTE, TITLE, SPLIT, CAVEAT, COMPND, SOURCE, KEYWDS, EXPDTA, NUMMDL, MDLTYP, AUTHOR, REVDAT, PRSDE, JRNL
88//     Remark, // REMARKs  0-999
89//     PrimaryStructure, // DBREF, SEQADV, SEQRES MODRES
90//     Heterogen, // HET, HETNAM, HETSYN, FORMUL
91//     SecondaryStructure, // HELIX, SHEET
92//     Connectivity, // CONECT, SSBOND, LINK, CISPEP
93//     Misc,  // SITE
94//     Crystallography, // CRYST1
95//     CoordinateTransformation, // ORIGXn,  SCALEn, MTRIXn,
96//     Coordinate, // MODEL, ATOM, ANISOU, TER, HETATM, ENDMDL
97// }
98// /// Commas, colons, and semi-colons are used as list delimiters in records that have one of the following data types:
99// ///
100// /// - List
101// /// - SList
102// /// - Specification List
103// /// - Specification
104// ///
105// /// If a comma, colon, or semi-colon is used in any context other than as a delimiting character, then the character must be escaped, i.e., immediately preceded by a backslash, "\".
106// enum Dtype {
107//     // To interpret a String, concatenate the contents of all continued fields together, collapse all sequences of multiple blanks to a single blank, and remove any leading and trailing blanks. This permits very long strings to be properly reconstructed.
108//     List,              // A String that is composed of text separated with commas.
109//     SList,             // A String that is composed of text separated with semi-colons.
110//     Specification, // A String composed of a token and its  associated value separated by a colon.
111//     SpecificationList, // A sequence of Specifications, separated by semi-colons.
112//     String,
113//     Oneline,
114// }
115
116// use std::str::from_utf8_unchecked;
117
118// use std::fs::read;
119// use std::fs::read_to_string;
120// use std::fs::File;
121
122// pub enum Record<'a> {
123//     Header(title_section::header::Header<'a>),
124//     Authors(Vec<&'a [u8]>),
125//     Keywords(Vec<&'a [u8]>),
126//     Cryst1(crystallography::cryst1::Cryst1),
127// }
128
129// use memmap::MmapOptions;
130// pub unsafe fn apply_file_content_unsafe<F, T>(fp: &[u8], parser: F) -> Result<T, std::io::Error>
131// where
132//     F: FnOnce(&[u8]) -> T,
133// {
134//     let file = File::open(fp)?;
135//     let mmap = MmapOptions::new().map(&file)?;
136//     let data = from_utf8_unchecked(&mmap[..]);
137//     let res = parser(data);
138//     Ok(res)
139// }
140
141// pub fn apply_file_content<F, T>(fp: &[u8], parser: F) -> Result<T, std::io::Error>
142// where
143//     F: FnOnce(&[u8]) -> T,
144// {
145//     let bytes = read(fp)?;
146//     let data = unsafe { from_utf8_unchecked(&bytes) };
147//     let res = parser(data);
148//     Ok(res)
149// }