knuckles_parse/records/
atom.rs

1#[cfg(feature = "serde")]
2use serde::{Deserialize, Serialize};
3
4#[cfg(feature = "python")]
5use knuckles_macro::pydefault;
6
7#[cfg(feature = "python")]
8use pyo3::prelude::*;
9
10/// Represents an ATOM or HETATM record from a PDB file.
11///
12/// This structure contains all the information for an atom coordinate record,
13/// including position, occupancy, temperature factors, and identification data.
14///
15/// # PDB Format
16///
17/// ATOM records contain atomic coordinate data for standard amino acid and nucleic acid residues.
18/// HETATM records contain atomic coordinate data for non-standard residues, water molecules,
19/// and other hetero-compounds.
20///
21/// # Fields
22///
23/// - `serial`: Atom serial number (1-99999, or hexadecimal for >99999)
24/// - `name`: Atom name (e.g., "CA", "N", "O")
25/// - `alt_loc`: Alternative location indicator
26/// - `res_name`: Residue name (e.g., "ALA", "GLY", "HOH")
27/// - `chain_id`: Chain identifier
28/// - `res_seq`: Residue sequence number
29/// - `i_code`: Insertion code for residues
30/// - `x`, `y`, `z`: Atomic coordinates in Ångströms
31/// - `occupancy`: Occupancy value (0.0-1.0)
32/// - `temp_factor`: Temperature factor (B-factor)
33/// - `element`: Element symbol
34/// - `charge`: Formal charge
35/// - `entry`: PDB entry identifier
36///
37/// # Example
38///
39/// ```rust
40/// use knuckles_parse::records::atom::AtomRecord;
41///
42/// let line = "ATOM      1  N   ALA A   1      20.154  16.967  27.462  1.00 11.18           N";
43/// let atom = AtomRecord::from(line);
44///
45/// assert_eq!(atom.serial, 1);
46/// assert_eq!(atom.name, "N");
47/// assert_eq!(atom.res_name, "ALA");
48/// assert_eq!(atom.x, 20.154);
49/// ```
50#[derive(Debug, Clone, PartialEq)]
51#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
52#[cfg_attr(
53    feature = "python",
54    pyclass(get_all, set_all, module = "kncukles_parse")
55)]
56#[cfg_attr(feature = "python", pydefault)]
57pub struct AtomRecord {
58    /// Atom serial number (1-99999, or hexadecimal for larger values)
59    pub serial: u32,
60    /// Atom name (e.g., "CA", "N", "O")
61    pub name: String,
62    /// Alternative location indicator
63    pub alt_loc: Option<char>,
64    /// Residue name (e.g., "ALA", "GLY", "HOH")
65    pub res_name: String,
66    /// Chain identifier
67    pub chain_id: Option<char>,
68    /// Residue sequence number
69    pub res_seq: i16,
70    /// Insertion code for residues
71    pub i_code: Option<char>,
72    /// X coordinate in Ångströms
73    pub x: f32,
74    /// Y coordinate in Ångströms
75    pub y: f32,
76    /// Z coordinate in Ångströms
77    pub z: f32,
78    /// Occupancy value (0.0-1.0)
79    pub occupancy: f32,
80    /// Temperature factor (B-factor)
81    pub temp_factor: f32,
82    /// Element symbol
83    pub element: Option<String>,
84    /// Formal charge
85    pub charge: Option<String>,
86    /// PDB entry identifier
87    pub entry: Option<String>,
88}
89
90impl AtomRecord {
91    /// Create a new AtomRecord by parsing a PDB ATOM or HETATM line.
92    ///
93    /// This method parses fixed-width fields according to the PDB format specification.
94    /// It handles both decimal and hexadecimal serial numbers (for atoms > 99999).
95    ///
96    /// # Arguments
97    ///
98    /// * `str` - A single ATOM or HETATM line from a PDB file
99    ///
100    /// # Returns
101    ///
102    /// A new `AtomRecord` with all fields parsed from the input line.
103    ///
104    /// # Panics
105    ///
106    /// Panics if required numeric fields cannot be parsed (coordinates, occupancy, etc.)
107    ///
108    /// # Example
109    ///
110    /// ```rust
111    /// use knuckles_parse::records::atom::AtomRecord;
112    ///
113    /// let line = "ATOM      1  N   ALA A   1      20.154  16.967  27.462  1.00 11.18           N";
114    /// let atom = AtomRecord::new(line);
115    /// assert_eq!(atom.name, "N");
116    /// ```
117    pub fn new(str: &str) -> AtomRecord {
118        let mut radix = 10;
119        let serial = str[6..11].trim();
120        if serial.chars().any(|c| c.is_ascii_alphabetic()) {
121            radix = 16;
122        }
123        AtomRecord {
124            // TODO: add support for parsing serial numbers > 99999
125            serial: u32::from_str_radix(serial, radix).unwrap_or_default(),
126            name: str[12..16].trim().to_string(),
127            alt_loc: str[16..17].trim().parse::<char>().ok(),
128            res_name: str[17..20].trim().to_string(),
129            chain_id: str[21..22].trim().parse::<char>().ok(),
130            res_seq: str[22..26].trim().parse().unwrap(),
131            i_code: str[26..27].trim().parse().ok(),
132            x: str[30..38].trim().parse().unwrap(),
133            y: str[38..46].trim().parse().unwrap(),
134            z: str[46..54].trim().parse().unwrap(),
135            occupancy: str[54..60].trim().parse().unwrap(),
136            temp_factor: str[60..66].trim().parse().unwrap(),
137            entry: str
138                .get(72..76)
139                .map(|str| str.trim().to_string())
140                .filter(|item| !item.is_empty()),
141            element: str
142                .get(77..80)
143                .map(|str| str.trim().to_string())
144                .filter(|item| !item.is_empty()),
145            charge: str
146                .get(78..80)
147                .map(|str| str.trim().to_string())
148                .filter(|item| !item.is_empty()),
149        }
150    }
151}
152
153impl From<&str> for AtomRecord {
154    fn from(str: &str) -> Self {
155        AtomRecord::new(str)
156    }
157}
158
159#[cfg(test)]
160mod tests {
161    use super::*;
162
163    #[test]
164    fn parse_atom_line_test() {
165        const LINE: &str =
166            "ATOM     17  NE2 GLN     2      25.562  32.733   1.806  1.00 19.49      1UBQ    ";
167        let record = AtomRecord::new(LINE);
168        assert_eq!(record.serial, 17);
169        assert_eq!(record.name, "NE2");
170        assert_eq!(record.alt_loc, None);
171        assert_eq!(record.res_name, "GLN");
172        assert_eq!(record.res_seq, 2);
173        assert_eq!(record.x, 25.562);
174        assert_eq!(record.y, 32.733);
175        assert_eq!(record.z, 1.806);
176        assert_eq!(record.occupancy, 1.00);
177        assert_eq!(record.temp_factor, 19.49);
178        assert_eq!(record.entry, Some("1UBQ".to_string()));
179        assert_eq!(record.element, None);
180        assert_eq!(record.charge, None);
181    }
182
183    #[test]
184    fn parse_atom_line_hex_test() {
185        const LINE: &str =
186            "ATOM  186a0  CA  GLY A  67      26.731  62.085   4.078  0.00  7.83           C  ";
187        let record = AtomRecord::new(LINE);
188        assert_eq!(record.serial, 100000);
189        assert_eq!(record.name, "CA");
190        assert_eq!(record.alt_loc, None);
191        assert_eq!(record.res_name, "GLY");
192        assert_eq!(record.res_seq, 67);
193        assert_eq!(record.x, 26.731);
194        assert_eq!(record.y, 62.085);
195        assert_eq!(record.z, 4.078);
196        assert_eq!(record.occupancy, 0.00);
197        assert_eq!(record.temp_factor, 7.83);
198        assert_eq!(record.entry, None);
199        assert_eq!(record.element, Some("C".to_string()));
200        assert_eq!(record.charge, None);
201    }
202}