knuckles_parse/lib.rs
1//! # knuckles-parse
2//!
3//! A fast and efficient PDB (Protein Data Bank) file parser written in Rust.
4//!
5//! This library provides functionality to parse PDB files into structured Rust data types,
6//! with support for parallel processing, Python bindings, and serialization.
7//!
8//! ## Features
9//!
10//! - **Fast parsing**: Optimized for performance with optional parallel processing
11//! - **Comprehensive record support**: Handles all major PDB record types
12//! - **Python bindings**: Optional Python integration via PyO3
13//! - **Serialization**: Optional JSON serialization support via Serde
14//! - **Parallel processing**: Optional multi-threaded parsing with Rayon
15//!
16//! ## Example
17//!
18//! ```rust
19//! use knuckles_parse::{pdbreader_single, Record};
20//!
21//! let pdb_content = "ATOM 1 N ALA A 1 20.154 16.967 27.462 1.00 11.18 N";
22//! let records = pdbreader_single(pdb_content);
23//!
24//! match &records[0] {
25//! Record::Atom(atom) => {
26//! println!("Atom name: {}", atom.name);
27//! println!("Coordinates: ({}, {}, {})", atom.x, atom.y, atom.z);
28//! }
29//! _ => {}
30//! }
31//! ```
32
33pub mod records;
34use records::Record;
35
36#[cfg(feature = "python")]
37use pyo3::prelude::*;
38
39/// Parse PDB file contents using parallel processing.
40///
41/// This function processes PDB file contents line-by-line using parallel processing
42/// to extract structured record data. It automatically handles atom serial number
43/// assignment for atoms that don't have them, which is necessary for some PDB files
44/// with more than 99,999 atoms.
45///
46/// # Arguments
47///
48/// * `contents` - The complete PDB file contents as a string
49///
50/// # Returns
51///
52/// A vector of [`Record`] variants representing the parsed PDB records.
53///
54/// # Features
55///
56/// This function is only available when the `parallel` feature is enabled.
57///
58/// # Example
59///
60/// ```rust
61/// use knuckles_parse::pdbreader_parallel;
62///
63/// let pdb_content = "ATOM 1 N ALA A 1 20.154 16.967 27.462 1.00 11.18 N\n\
64/// ATOM 2 CA ALA A 1 20.987 18.149 27.890 1.00 11.85 C";
65/// let records = pdbreader_parallel(pdb_content);
66/// println!("Parsed {} records", records.len());
67/// ```
68#[cfg(feature = "parallel")]
69pub fn pdbreader_parallel(contents: &str) -> Vec<Record> {
70 use rayon::prelude::*;
71
72 let lines: Vec<&str> = contents.lines().collect();
73 let mut record: Vec<Record> = lines
74 .par_iter()
75 .filter_map(|line| {
76 if line.len() < 6 {
77 return None;
78 }
79 Record::try_from(*line).ok()
80 })
81 .collect();
82
83 // We then comb through the records and assign serial numbers to atoms that
84 // don't have them. This is necessary for some PDB files, which have more than 99999 atoms.
85 // NOTE: We don't need to use a second pass in the single threaded version because we can do it
86 // in the same pass.
87 let mut last = 0;
88 for atom in record.iter_mut() {
89 if let Record::Atom(atom) = atom {
90 if atom.serial == 0 {
91 last += 1;
92 atom.serial = last;
93 } else {
94 last = atom.serial;
95 }
96 }
97 }
98 record
99}
100
101/// Parse PDB file contents using single-threaded processing.
102///
103/// This function processes PDB file contents line-by-line in a single thread
104/// to extract structured record data. It handles atom serial number assignment
105/// during the parsing process, making it more efficient than the parallel version
106/// for smaller files.
107///
108/// # Arguments
109///
110/// * `contents` - The complete PDB file contents as a string
111///
112/// # Returns
113///
114/// A vector of [`Record`] variants representing the parsed PDB records.
115/// Note: Currently only returns ATOM records, filtering out other record types.
116///
117/// # Example
118///
119/// ```rust
120/// use knuckles_parse::pdbreader_single;
121///
122/// let pdb_content = "ATOM 1 N ALA A 1 20.154 16.967 27.462 1.00 11.18 N\n\
123/// HETATM 2 O HOH A 2 15.123 12.456 30.789 1.00 25.50 O";
124/// let records = pdbreader_single(pdb_content);
125/// println!("Parsed {} atom records", records.len());
126/// ```
127pub fn pdbreader_single(contents: &str) -> Vec<Record> {
128 let mut last = 0;
129 contents
130 .lines()
131 .filter_map(|line| {
132 if line.len() < 6 {
133 return None;
134 }
135 let record = Record::try_from(line);
136 if let Ok(Record::Atom(mut atom)) = record {
137 if atom.serial == 0 {
138 last += 1;
139 atom.serial = last;
140 } else {
141 last = atom.serial;
142 }
143 Some(Record::Atom(atom))
144 } else {
145 None
146 }
147 // Record::try_from(line).ok()
148 })
149 .collect()
150}
151
152#[cfg(feature = "python")]
153#[pymodule(name = "knuckles_parse")]
154mod knuckles_parse {
155 use super::*;
156 #[pymodule_export]
157 use crate::records::anisotropic::AnisotropicRecord;
158 #[pymodule_export]
159 use crate::records::atom::AtomRecord;
160 #[pymodule_export]
161 use crate::records::connect::ConnectRecord;
162 #[pymodule_export]
163 use crate::records::crystal::CrystalRecord;
164 #[pymodule_export]
165 use crate::records::dbref::DBRefRecord;
166 #[pymodule_export]
167 use crate::records::het::HetRecord;
168 #[pymodule_export]
169 use crate::records::hetnam::HetnamRecord;
170 #[pymodule_export]
171 use crate::records::model::ModelRecord;
172 #[pymodule_export]
173 use crate::records::modres::ModresRecord;
174 #[pymodule_export]
175 use crate::records::mtrixn::MtrixnRecord;
176 #[pymodule_export]
177 use crate::records::nummdl::NummdlRecord;
178 #[pymodule_export]
179 use crate::records::origxn::OrigxnRecord;
180 #[pymodule_export]
181 use crate::records::scalen::ScalenRecord;
182 #[pymodule_export]
183 use crate::records::seqadv::SeqAdvRecord;
184 #[pymodule_export]
185 use crate::records::seqres::SeqresRecord;
186 #[pymodule_export]
187 use crate::records::term::TermRecord;
188 #[pymodule_export]
189 use crate::records::Record;
190
191 /// Creates a list of PDB records from a string
192 #[pyfunction]
193 fn pdbreader(contents: &str) -> Vec<Record> {
194 pdbreader_parallel(contents)
195 }
196
197 #[pyfunction]
198 fn version() -> String {
199 env!("CARGO_PKG_VERSION").to_string()
200 }
201}