nom_pdb/lib.rs
1// Copyright (c) 2020 Tianyi Shi
2//
3// This software is released under the MIT License.
4// https://opensource.org/licenses/MIT
5
6//! # nom-pdb
7//!
8//! A PDB (Protein Data Bank) file parser implemented with nom.
9//!
10//! See [github repository](https://github.com/TianyiShi2001/nom-pdb) for examples.
11
12pub mod common;
13pub mod complete;
14pub mod coordinate;
15pub mod crystallography;
16pub mod het;
17pub mod primary_structure;
18pub mod remark;
19pub mod secondary_structure;
20pub mod title_section;
21pub(crate) mod types;
22
23pub use complete::Parser;
24
25// /// http://www.wwpdb.org/documentation/file-format-content/format33/sect1.html
26// #[derive(Eq, PartialEq, Debug, Ord, PartialOrd)]
27// enum Fields {
28// Header, // M
29// Obslte, // O : Mandatory in entries that have been replaced by a newer entry.
30// Title, // M
31// Split, // O : Mandatory when large macromolecular complexes are split into multiple PDB entries.
32// Caveat, // O : Mandatory when there are outstanding errors such as chirality.
33// Compnd, // M
34// Source, // M
35// Keywds, // M
36// Expdta, // M
37// Nummdl, // O : Mandatory for NMR ensemble entries.
38// Mdltyp, // O : Mandatory for NMR minimized average Structures or when the entire polymer chain contains C alpha or P atoms only.
39// Author, // M
40// Revdat, // M
41// Sprsde, // O : Mandatory for a replacement entry.
42// Jrnl, // O: Mandatory for a publication describes the experiment.
43// Remark0, // O : Mandatory for a re-refined structure
44// Remark1, // O
45// Remark2, // M
46// Remark3, // M
47// RemarkN, // O : Mandatory under certain conditions
48// Dbref, // O : Mandatory for all polymers.
49// Dbref1,
50// Dbref2, // O : Mandatory when certain sequence database accession and/or sequence numbering does not fit preceding DBREF format.
51// SeqAdv, // O : Mandatory if sequence conflict exists.
52// SeqRes, // O : Mandatory if ATOM records exist.
53// Modres, // O : Mandatory if modified group exists in the coordinates.
54// Het, // O : Mandatory if a non-standard group other than water appears in the coordinates.
55// Hetnam, // O : Mandatory if a non-standard group otherthan water appears in the coordinates.
56// Hetsyn, // O
57// Formul, // O : Mandatory if a non-standard group or water appears in the coordinates.
58// Helix, // O
59// Sheet, // O
60// Ssbond, // O : Mandatory if a disulfide bond is present.
61// Link, // O : Mandatory if non-standard residues appear in a polymer
62// Cispep, // O
63// Site, // O
64// Cryst1, // M
65// Origx1, // M
66// Origx2, // M
67// Origx3, // M
68// Scale1, // M
69// Scale2, // M
70// Scale3, // M
71// Mtrix1, // O Mandatory if the complete asymmetric unit
72// Mtrix2, // O must be generated from the given coordinates
73// Mtrix3, // O using non-crystallographic symmetry.
74// Model, // O : Mandatory if more than one model is present in the entry.
75// Atom, // O : Mandatory if standard residues exist.
76// Anisou, // O
77// Ter, // O : Mandatory if ATOM records exist.
78// Hetatm, // O : Mandatory if non-standard group exists.
79// Endmdl, // O : Mandatory if MODEL appears.
80// Conect, // O : Mandatory if non-standard group appears and if LINK or SSBOND records exist.
81// Master, // M
82// End, // M
83// }
84
85// #[derive(Eq, PartialEq, Debug, Ord, PartialOrd)]
86// enum Section {
87// Title, // HEADER, OBSLTE, TITLE, SPLIT, CAVEAT, COMPND, SOURCE, KEYWDS, EXPDTA, NUMMDL, MDLTYP, AUTHOR, REVDAT, PRSDE, JRNL
88// Remark, // REMARKs 0-999
89// PrimaryStructure, // DBREF, SEQADV, SEQRES MODRES
90// Heterogen, // HET, HETNAM, HETSYN, FORMUL
91// SecondaryStructure, // HELIX, SHEET
92// Connectivity, // CONECT, SSBOND, LINK, CISPEP
93// Misc, // SITE
94// Crystallography, // CRYST1
95// CoordinateTransformation, // ORIGXn, SCALEn, MTRIXn,
96// Coordinate, // MODEL, ATOM, ANISOU, TER, HETATM, ENDMDL
97// }
98// /// Commas, colons, and semi-colons are used as list delimiters in records that have one of the following data types:
99// ///
100// /// - List
101// /// - SList
102// /// - Specification List
103// /// - Specification
104// ///
105// /// If a comma, colon, or semi-colon is used in any context other than as a delimiting character, then the character must be escaped, i.e., immediately preceded by a backslash, "\".
106// enum Dtype {
107// // To interpret a String, concatenate the contents of all continued fields together, collapse all sequences of multiple blanks to a single blank, and remove any leading and trailing blanks. This permits very long strings to be properly reconstructed.
108// List, // A String that is composed of text separated with commas.
109// SList, // A String that is composed of text separated with semi-colons.
110// Specification, // A String composed of a token and its associated value separated by a colon.
111// SpecificationList, // A sequence of Specifications, separated by semi-colons.
112// String,
113// Oneline,
114// }
115
116// use std::str::from_utf8_unchecked;
117
118// use std::fs::read;
119// use std::fs::read_to_string;
120// use std::fs::File;
121
122// pub enum Record<'a> {
123// Header(title_section::header::Header<'a>),
124// Authors(Vec<&'a [u8]>),
125// Keywords(Vec<&'a [u8]>),
126// Cryst1(crystallography::cryst1::Cryst1),
127// }
128
129// use memmap::MmapOptions;
130// pub unsafe fn apply_file_content_unsafe<F, T>(fp: &[u8], parser: F) -> Result<T, std::io::Error>
131// where
132// F: FnOnce(&[u8]) -> T,
133// {
134// let file = File::open(fp)?;
135// let mmap = MmapOptions::new().map(&file)?;
136// let data = from_utf8_unchecked(&mmap[..]);
137// let res = parser(data);
138// Ok(res)
139// }
140
141// pub fn apply_file_content<F, T>(fp: &[u8], parser: F) -> Result<T, std::io::Error>
142// where
143// F: FnOnce(&[u8]) -> T,
144// {
145// let bytes = read(fp)?;
146// let data = unsafe { from_utf8_unchecked(&bytes) };
147// let res = parser(data);
148// Ok(res)
149// }