knuckles_parse/records/atom.rs
1#[cfg(feature = "serde")]
2use serde::{Deserialize, Serialize};
3
4#[cfg(feature = "python")]
5use knuckles_macro::pydefault;
6
7#[cfg(feature = "python")]
8use pyo3::prelude::*;
9
10/// Represents an ATOM or HETATM record from a PDB file.
11///
12/// This structure contains all the information for an atom coordinate record,
13/// including position, occupancy, temperature factors, and identification data.
14///
15/// # PDB Format
16///
17/// ATOM records contain atomic coordinate data for standard amino acid and nucleic acid residues.
18/// HETATM records contain atomic coordinate data for non-standard residues, water molecules,
19/// and other hetero-compounds.
20///
21/// # Fields
22///
23/// - `serial`: Atom serial number (1-99999, or hexadecimal for >99999)
24/// - `name`: Atom name (e.g., "CA", "N", "O")
25/// - `alt_loc`: Alternative location indicator
26/// - `res_name`: Residue name (e.g., "ALA", "GLY", "HOH")
27/// - `chain_id`: Chain identifier
28/// - `res_seq`: Residue sequence number
29/// - `i_code`: Insertion code for residues
30/// - `x`, `y`, `z`: Atomic coordinates in Ångströms
31/// - `occupancy`: Occupancy value (0.0-1.0)
32/// - `temp_factor`: Temperature factor (B-factor)
33/// - `element`: Element symbol
34/// - `charge`: Formal charge
35/// - `entry`: PDB entry identifier
36///
37/// # Example
38///
39/// ```rust
40/// use knuckles_parse::records::atom::AtomRecord;
41///
42/// let line = "ATOM 1 N ALA A 1 20.154 16.967 27.462 1.00 11.18 N";
43/// let atom = AtomRecord::from(line);
44///
45/// assert_eq!(atom.serial, 1);
46/// assert_eq!(atom.name, "N");
47/// assert_eq!(atom.res_name, "ALA");
48/// assert_eq!(atom.x, 20.154);
49/// ```
50#[derive(Debug, Clone, PartialEq)]
51#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
52#[cfg_attr(
53 feature = "python",
54 pyclass(get_all, set_all, module = "kncukles_parse")
55)]
56#[cfg_attr(feature = "python", pydefault)]
57pub struct AtomRecord {
58 /// Atom serial number (1-99999, or hexadecimal for larger values)
59 pub serial: u32,
60 /// Atom name (e.g., "CA", "N", "O")
61 pub name: String,
62 /// Alternative location indicator
63 pub alt_loc: Option<char>,
64 /// Residue name (e.g., "ALA", "GLY", "HOH")
65 pub res_name: String,
66 /// Chain identifier
67 pub chain_id: Option<char>,
68 /// Residue sequence number
69 pub res_seq: i16,
70 /// Insertion code for residues
71 pub i_code: Option<char>,
72 /// X coordinate in Ångströms
73 pub x: f32,
74 /// Y coordinate in Ångströms
75 pub y: f32,
76 /// Z coordinate in Ångströms
77 pub z: f32,
78 /// Occupancy value (0.0-1.0)
79 pub occupancy: f32,
80 /// Temperature factor (B-factor)
81 pub temp_factor: f32,
82 /// Element symbol
83 pub element: Option<String>,
84 /// Formal charge
85 pub charge: Option<String>,
86 /// PDB entry identifier
87 pub entry: Option<String>,
88}
89
90impl AtomRecord {
91 /// Create a new AtomRecord by parsing a PDB ATOM or HETATM line.
92 ///
93 /// This method parses fixed-width fields according to the PDB format specification.
94 /// It handles both decimal and hexadecimal serial numbers (for atoms > 99999).
95 ///
96 /// # Arguments
97 ///
98 /// * `str` - A single ATOM or HETATM line from a PDB file
99 ///
100 /// # Returns
101 ///
102 /// A new `AtomRecord` with all fields parsed from the input line.
103 ///
104 /// # Panics
105 ///
106 /// Panics if required numeric fields cannot be parsed (coordinates, occupancy, etc.)
107 ///
108 /// # Example
109 ///
110 /// ```rust
111 /// use knuckles_parse::records::atom::AtomRecord;
112 ///
113 /// let line = "ATOM 1 N ALA A 1 20.154 16.967 27.462 1.00 11.18 N";
114 /// let atom = AtomRecord::new(line);
115 /// assert_eq!(atom.name, "N");
116 /// ```
117 pub fn new(str: &str) -> AtomRecord {
118 let mut radix = 10;
119 let serial = str[6..11].trim();
120 if serial.chars().any(|c| c.is_ascii_alphabetic()) {
121 radix = 16;
122 }
123 AtomRecord {
124 // TODO: add support for parsing serial numbers > 99999
125 serial: u32::from_str_radix(serial, radix).unwrap_or_default(),
126 name: str[12..16].trim().to_string(),
127 alt_loc: str[16..17].trim().parse::<char>().ok(),
128 res_name: str[17..20].trim().to_string(),
129 chain_id: str[21..22].trim().parse::<char>().ok(),
130 res_seq: str[22..26].trim().parse().unwrap(),
131 i_code: str[26..27].trim().parse().ok(),
132 x: str[30..38].trim().parse().unwrap(),
133 y: str[38..46].trim().parse().unwrap(),
134 z: str[46..54].trim().parse().unwrap(),
135 occupancy: str[54..60].trim().parse().unwrap(),
136 temp_factor: str[60..66].trim().parse().unwrap(),
137 entry: str
138 .get(72..76)
139 .map(|str| str.trim().to_string())
140 .filter(|item| !item.is_empty()),
141 element: str
142 .get(77..80)
143 .map(|str| str.trim().to_string())
144 .filter(|item| !item.is_empty()),
145 charge: str
146 .get(78..80)
147 .map(|str| str.trim().to_string())
148 .filter(|item| !item.is_empty()),
149 }
150 }
151}
152
153impl From<&str> for AtomRecord {
154 fn from(str: &str) -> Self {
155 AtomRecord::new(str)
156 }
157}
158
159#[cfg(test)]
160mod tests {
161 use super::*;
162
163 #[test]
164 fn parse_atom_line_test() {
165 const LINE: &str =
166 "ATOM 17 NE2 GLN 2 25.562 32.733 1.806 1.00 19.49 1UBQ ";
167 let record = AtomRecord::new(LINE);
168 assert_eq!(record.serial, 17);
169 assert_eq!(record.name, "NE2");
170 assert_eq!(record.alt_loc, None);
171 assert_eq!(record.res_name, "GLN");
172 assert_eq!(record.res_seq, 2);
173 assert_eq!(record.x, 25.562);
174 assert_eq!(record.y, 32.733);
175 assert_eq!(record.z, 1.806);
176 assert_eq!(record.occupancy, 1.00);
177 assert_eq!(record.temp_factor, 19.49);
178 assert_eq!(record.entry, Some("1UBQ".to_string()));
179 assert_eq!(record.element, None);
180 assert_eq!(record.charge, None);
181 }
182
183 #[test]
184 fn parse_atom_line_hex_test() {
185 const LINE: &str =
186 "ATOM 186a0 CA GLY A 67 26.731 62.085 4.078 0.00 7.83 C ";
187 let record = AtomRecord::new(LINE);
188 assert_eq!(record.serial, 100000);
189 assert_eq!(record.name, "CA");
190 assert_eq!(record.alt_loc, None);
191 assert_eq!(record.res_name, "GLY");
192 assert_eq!(record.res_seq, 67);
193 assert_eq!(record.x, 26.731);
194 assert_eq!(record.y, 62.085);
195 assert_eq!(record.z, 4.078);
196 assert_eq!(record.occupancy, 0.00);
197 assert_eq!(record.temp_factor, 7.83);
198 assert_eq!(record.entry, None);
199 assert_eq!(record.element, Some("C".to_string()));
200 assert_eq!(record.charge, None);
201 }
202}