1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
// Copyright (c) 2020 Tianyi Shi
//
// This software is released under the MIT License.
// https://opensource.org/licenses/MIT

//! # nom-pdb
//!
//! A PDB (Protein Data Bank) file parser implemented with nom.
//!
//! See [github repository](https://github.com/TianyiShi2001/nom-pdb) for examples.

pub mod common;
pub mod complete;
pub mod coordinate;
pub mod crystallography;
pub mod het;
pub mod primary_structure;
pub mod remark;
pub mod secondary_structure;
pub mod title_section;
pub(crate) mod types;

pub use complete::Parser;

// /// http://www.wwpdb.org/documentation/file-format-content/format33/sect1.html
// #[derive(Eq, PartialEq, Debug, Ord, PartialOrd)]
// enum Fields {
//     Header,  // M
//     Obslte,  // O : Mandatory in entries that have been replaced by a newer entry.
//     Title,   // M
//     Split, //   O : Mandatory when  large macromolecular complexes are split into multiple PDB entries.
//     Caveat, // O : Mandatory when there are outstanding errors such as chirality.
//     Compnd, // M
//     Source, // M
//     Keywds, // M
//     Expdta, // M
//     Nummdl, // O : Mandatory for  NMR ensemble entries.
//     Mdltyp, // O : Mandatory for  NMR minimized average Structures or when the entire  polymer chain contains C alpha or P atoms only.
//     Author, // M
//     Revdat, // M
//     Sprsde, // O : Mandatory for a replacement entry.
//     Jrnl,   //   O: Mandatory for a publication describes the experiment.
//     Remark0, // O : Mandatory for a re-refined structure
//     Remark1, // O
//     Remark2, // M
//     Remark3, // M
//     RemarkN, // O : Mandatory under certain conditions
//     Dbref,  //  O : Mandatory for all polymers.
//     Dbref1,
//     Dbref2, // O : Mandatory when certain sequence database accession and/or sequence numbering does not fit preceding DBREF format.
//     SeqAdv, // O : Mandatory if sequence  conflict exists.
//     SeqRes, // O : Mandatory if ATOM records exist.
//     Modres, // O : Mandatory if modified group exists in the coordinates.
//     Het,    // O : Mandatory if a non-standard group other than water appears in the coordinates.
//     Hetnam, // O : Mandatory if a non-standard group otherthan  water appears in the coordinates.
//     Hetsyn, // O
//     Formul, // O : Mandatory if a non-standard group or water appears in the coordinates.
//     Helix,  // O
//     Sheet,  // O
//     Ssbond, // O : Mandatory if a  disulfide bond is present.
//     Link,   // O : Mandatory if  non-standard residues appear in a polymer
//     Cispep, // O
//     Site,   // O
//     Cryst1, // M
//     Origx1, // M
//     Origx2, // M
//     Origx3, // M
//     Scale1, // M
//     Scale2, // M
//     Scale3, // M
//     Mtrix1, // O Mandatory if  the complete asymmetric unit
//     Mtrix2, // O must  be generated from the given coordinates
//     Mtrix3, // O using non-crystallographic symmetry.
//     Model,  // O : Mandatory if more than one model is present in the entry.
//     Atom,   // O : Mandatory if standard residues exist.
//     Anisou, // O
//     Ter,    // O : Mandatory if ATOM records exist.
//     Hetatm, // O : Mandatory if non-standard group exists.
//     Endmdl, // O : Mandatory if MODEL appears.
//     Conect, // O : Mandatory if non-standard group appears and if LINK or SSBOND records exist.
//     Master, // M
//     End,    // M
// }

// #[derive(Eq, PartialEq, Debug, Ord, PartialOrd)]
// enum Section {
//     Title, // HEADER,  OBSLTE, TITLE, SPLIT, CAVEAT, COMPND, SOURCE, KEYWDS, EXPDTA, NUMMDL, MDLTYP, AUTHOR, REVDAT, PRSDE, JRNL
//     Remark, // REMARKs  0-999
//     PrimaryStructure, // DBREF, SEQADV, SEQRES MODRES
//     Heterogen, // HET, HETNAM, HETSYN, FORMUL
//     SecondaryStructure, // HELIX, SHEET
//     Connectivity, // CONECT, SSBOND, LINK, CISPEP
//     Misc,  // SITE
//     Crystallography, // CRYST1
//     CoordinateTransformation, // ORIGXn,  SCALEn, MTRIXn,
//     Coordinate, // MODEL, ATOM, ANISOU, TER, HETATM, ENDMDL
// }
// /// Commas, colons, and semi-colons are used as list delimiters in records that have one of the following data types:
// ///
// /// - List
// /// - SList
// /// - Specification List
// /// - Specification
// ///
// /// If a comma, colon, or semi-colon is used in any context other than as a delimiting character, then the character must be escaped, i.e., immediately preceded by a backslash, "\".
// enum Dtype {
//     // To interpret a String, concatenate the contents of all continued fields together, collapse all sequences of multiple blanks to a single blank, and remove any leading and trailing blanks. This permits very long strings to be properly reconstructed.
//     List,              // A String that is composed of text separated with commas.
//     SList,             // A String that is composed of text separated with semi-colons.
//     Specification, // A String composed of a token and its  associated value separated by a colon.
//     SpecificationList, // A sequence of Specifications, separated by semi-colons.
//     String,
//     Oneline,
// }

// use std::str::from_utf8_unchecked;

// use std::fs::read;
// use std::fs::read_to_string;
// use std::fs::File;

// pub enum Record<'a> {
//     Header(title_section::header::Header<'a>),
//     Authors(Vec<&'a [u8]>),
//     Keywords(Vec<&'a [u8]>),
//     Cryst1(crystallography::cryst1::Cryst1),
// }

// use memmap::MmapOptions;
// pub unsafe fn apply_file_content_unsafe<F, T>(fp: &[u8], parser: F) -> Result<T, std::io::Error>
// where
//     F: FnOnce(&[u8]) -> T,
// {
//     let file = File::open(fp)?;
//     let mmap = MmapOptions::new().map(&file)?;
//     let data = from_utf8_unchecked(&mmap[..]);
//     let res = parser(data);
//     Ok(res)
// }

// pub fn apply_file_content<F, T>(fp: &[u8], parser: F) -> Result<T, std::io::Error>
// where
//     F: FnOnce(&[u8]) -> T,
// {
//     let bytes = read(fp)?;
//     let data = unsafe { from_utf8_unchecked(&bytes) };
//     let res = parser(data);
//     Ok(res)
// }