knuckles_parse/
lib.rs

1//! # knuckles-parse
2//!
3//! A fast and efficient PDB (Protein Data Bank) file parser written in Rust.
4//!
5//! This library provides functionality to parse PDB files into structured Rust data types,
6//! with support for parallel processing, Python bindings, and serialization.
7//!
8//! ## Features
9//!
10//! - **Fast parsing**: Optimized for performance with optional parallel processing
11//! - **Comprehensive record support**: Handles all major PDB record types
12//! - **Python bindings**: Optional Python integration via PyO3
13//! - **Serialization**: Optional JSON serialization support via Serde
14//! - **Parallel processing**: Optional multi-threaded parsing with Rayon
15//!
16//! ## Example
17//!
18//! ```rust
19//! use knuckles_parse::{pdbreader_single, Record};
20//!
21//! let pdb_content = "ATOM      1  N   ALA A   1      20.154  16.967  27.462  1.00 11.18           N";
22//! let records = pdbreader_single(pdb_content);
23//!
24//! match &records[0] {
25//!     Record::Atom(atom) => {
26//!         println!("Atom name: {}", atom.name);
27//!         println!("Coordinates: ({}, {}, {})", atom.x, atom.y, atom.z);
28//!     }
29//!     _ => {}
30//! }
31//! ```
32
33pub mod records;
34use records::Record;
35
36#[cfg(feature = "python")]
37use pyo3::prelude::*;
38
39/// Parse PDB file contents using parallel processing.
40///
41/// This function processes PDB file contents line-by-line using parallel processing
42/// to extract structured record data. It automatically handles atom serial number
43/// assignment for atoms that don't have them, which is necessary for some PDB files
44/// with more than 99,999 atoms.
45///
46/// # Arguments
47///
48/// * `contents` - The complete PDB file contents as a string
49///
50/// # Returns
51///
52/// A vector of [`Record`] variants representing the parsed PDB records.
53///
54/// # Features
55///
56/// This function is only available when the `parallel` feature is enabled.
57///
58/// # Example
59///
60/// ```rust
61/// use knuckles_parse::pdbreader_parallel;
62///
63/// let pdb_content = "ATOM      1  N   ALA A   1      20.154  16.967  27.462  1.00 11.18           N\n\
64///                    ATOM      2  CA  ALA A   1      20.987  18.149  27.890  1.00 11.85           C";
65/// let records = pdbreader_parallel(pdb_content);
66/// println!("Parsed {} records", records.len());
67/// ```
68#[cfg(feature = "parallel")]
69pub fn pdbreader_parallel(contents: &str) -> Vec<Record> {
70    use rayon::prelude::*;
71
72    let lines: Vec<&str> = contents.lines().collect();
73    let mut record: Vec<Record> = lines
74        .par_iter()
75        .filter_map(|line| {
76            if line.len() < 6 {
77                return None;
78            }
79            Record::try_from(*line).ok()
80        })
81        .collect();
82
83    // We then comb through the records and assign serial numbers to atoms that
84    // don't have them. This is necessary for some PDB files, which have more than 99999 atoms.
85    // NOTE: We don't need to use a second pass in the single threaded version because we can do it
86    // in the same pass.
87    let mut last = 0;
88    for atom in record.iter_mut() {
89        if let Record::Atom(atom) = atom {
90            if atom.serial == 0 {
91                last += 1;
92                atom.serial = last;
93            } else {
94                last = atom.serial;
95            }
96        }
97    }
98    record
99}
100
101/// Parse PDB file contents using single-threaded processing.
102///
103/// This function processes PDB file contents line-by-line in a single thread
104/// to extract structured record data. It handles atom serial number assignment
105/// during the parsing process, making it more efficient than the parallel version
106/// for smaller files.
107///
108/// # Arguments
109///
110/// * `contents` - The complete PDB file contents as a string
111///
112/// # Returns
113///
114/// A vector of [`Record`] variants representing the parsed PDB records.
115/// Note: Currently only returns ATOM records, filtering out other record types.
116///
117/// # Example
118///
119/// ```rust
120/// use knuckles_parse::pdbreader_single;
121///
122/// let pdb_content = "ATOM      1  N   ALA A   1      20.154  16.967  27.462  1.00 11.18           N\n\
123///                    HETATM    2  O   HOH A   2      15.123  12.456  30.789  1.00 25.50           O";
124/// let records = pdbreader_single(pdb_content);
125/// println!("Parsed {} atom records", records.len());
126/// ```
127pub fn pdbreader_single(contents: &str) -> Vec<Record> {
128    let mut last = 0;
129    contents
130        .lines()
131        .filter_map(|line| {
132            if line.len() < 6 {
133                return None;
134            }
135            let record = Record::try_from(line);
136            if let Ok(Record::Atom(mut atom)) = record {
137                if atom.serial == 0 {
138                    last += 1;
139                    atom.serial = last;
140                } else {
141                    last = atom.serial;
142                }
143                Some(Record::Atom(atom))
144            } else {
145                None
146            }
147            // Record::try_from(line).ok()
148        })
149        .collect()
150}
151
152#[cfg(feature = "python")]
153#[pymodule(name = "knuckles_parse")]
154mod knuckles_parse {
155    use super::*;
156    #[pymodule_export]
157    use crate::records::anisotropic::AnisotropicRecord;
158    #[pymodule_export]
159    use crate::records::atom::AtomRecord;
160    #[pymodule_export]
161    use crate::records::connect::ConnectRecord;
162    #[pymodule_export]
163    use crate::records::crystal::CrystalRecord;
164    #[pymodule_export]
165    use crate::records::dbref::DBRefRecord;
166    #[pymodule_export]
167    use crate::records::het::HetRecord;
168    #[pymodule_export]
169    use crate::records::hetnam::HetnamRecord;
170    #[pymodule_export]
171    use crate::records::model::ModelRecord;
172    #[pymodule_export]
173    use crate::records::modres::ModresRecord;
174    #[pymodule_export]
175    use crate::records::mtrixn::MtrixnRecord;
176    #[pymodule_export]
177    use crate::records::nummdl::NummdlRecord;
178    #[pymodule_export]
179    use crate::records::origxn::OrigxnRecord;
180    #[pymodule_export]
181    use crate::records::scalen::ScalenRecord;
182    #[pymodule_export]
183    use crate::records::seqadv::SeqAdvRecord;
184    #[pymodule_export]
185    use crate::records::seqres::SeqresRecord;
186    #[pymodule_export]
187    use crate::records::term::TermRecord;
188    #[pymodule_export]
189    use crate::records::Record;
190
191    /// Creates a list of PDB records from a string
192    #[pyfunction]
193    fn pdbreader(contents: &str) -> Vec<Record> {
194        pdbreader_parallel(contents)
195    }
196
197    #[pyfunction]
198    fn version() -> String {
199        env!("CARGO_PKG_VERSION").to_string()
200    }
201}