Skip to main content

oxiphysics_io/
pdb.rs

1// Copyright 2026 COOLJAPAN OU (Team KitaSan)
2// SPDX-License-Identifier: Apache-2.0
3
4//! PDB (Protein Data Bank) format writer and reader.
5//!
6//! Supports ATOM, HETATM, CONECT, CRYST1, multi-MODEL PDB files,
7//! occupancy/B-factor handling, and chain ID operations.
8
9use crate::{Error, Result};
10use oxiphysics_core::math::Vec3;
11use std::fs::File;
12use std::io::{BufRead, BufReader, BufWriter, Write};
13use std::path::Path;
14
15/// Represents a single atom record in PDB format.
16#[derive(Debug, Clone)]
17pub struct PdbAtom {
18    /// Atom serial number.
19    pub serial: u32,
20    /// Atom name (e.g., "CA", "N").
21    pub name: String,
22    /// Residue name (e.g., "ALA", "GLY").
23    pub res_name: String,
24    /// Chain identifier (single character, e.g., 'A').
25    pub chain_id: char,
26    /// Residue sequence number.
27    pub res_seq: u32,
28    /// X coordinate in angstroms.
29    pub x: f64,
30    /// Y coordinate in angstroms.
31    pub y: f64,
32    /// Z coordinate in angstroms.
33    pub z: f64,
34    /// Occupancy factor.
35    pub occupancy: f64,
36    /// Temperature (B) factor.
37    pub temp_factor: f64,
38    /// Element symbol.
39    pub element: String,
40    /// Whether this is a HETATM record (vs ATOM).
41    pub is_hetatm: bool,
42}
43
44/// All named fields for constructing a [`PdbAtom`] via [`PdbAtom::new`].
45///
46/// Groups the 11 record fields so the constructor stays within the
47/// argument-count limit.
48#[derive(Debug, Clone)]
49pub struct PdbAtomData {
50    /// Atom serial number.
51    pub serial: u32,
52    /// Atom name (e.g., `"CA"`, `"N"`).
53    pub name: String,
54    /// Residue name (e.g., `"ALA"`, `"GLY"`).
55    pub res_name: String,
56    /// Chain identifier.
57    pub chain_id: char,
58    /// Residue sequence number.
59    pub res_seq: u32,
60    /// X coordinate \[Å\]
61    pub x: f64,
62    /// Y coordinate \[Å\]
63    pub y: f64,
64    /// Z coordinate \[Å\]
65    pub z: f64,
66    /// Occupancy factor.
67    pub occupancy: f64,
68    /// Temperature (B) factor.
69    pub temp_factor: f64,
70    /// Element symbol.
71    pub element: String,
72}
73
74impl PdbAtom {
75    /// Create a `PdbAtom` from a [`PdbAtomData`] record.
76    pub fn new(data: PdbAtomData) -> Self {
77        PdbAtom {
78            serial: data.serial,
79            name: data.name,
80            res_name: data.res_name,
81            chain_id: data.chain_id,
82            res_seq: data.res_seq,
83            x: data.x,
84            y: data.y,
85            z: data.z,
86            occupancy: data.occupancy,
87            temp_factor: data.temp_factor,
88            element: data.element,
89            is_hetatm: false,
90        }
91    }
92
93    /// Return position as a `Vec3`.
94    pub fn position(&self) -> Vec3 {
95        Vec3::new(self.x, self.y, self.z)
96    }
97
98    /// Compute distance to another atom.
99    pub fn distance_to(&self, other: &PdbAtom) -> f64 {
100        let dx = self.x - other.x;
101        let dy = self.y - other.y;
102        let dz = self.z - other.z;
103        (dx * dx + dy * dy + dz * dz).sqrt()
104    }
105}
106
107/// A CONECT record: bonds between atoms.
108#[derive(Debug, Clone)]
109pub struct ConectRecord {
110    /// Serial number of the atom.
111    pub atom_serial: u32,
112    /// Serial numbers of bonded atoms.
113    pub bonded_atoms: Vec<u32>,
114}
115
116/// A CRYST1 record: unit cell parameters.
117#[derive(Debug, Clone)]
118pub struct Cryst1Record {
119    /// Unit cell lengths a, b, c in angstroms.
120    pub a: f64,
121    /// Unit cell length b.
122    pub b: f64,
123    /// Unit cell length c.
124    pub c: f64,
125    /// Unit cell angle alpha in degrees.
126    pub alpha: f64,
127    /// Unit cell angle beta in degrees.
128    pub beta: f64,
129    /// Unit cell angle gamma in degrees.
130    pub gamma: f64,
131    /// Space group.
132    pub space_group: String,
133    /// Z value (number of molecules per unit cell).
134    pub z_value: u32,
135}
136
137impl Default for Cryst1Record {
138    fn default() -> Self {
139        Cryst1Record {
140            a: 1.0,
141            b: 1.0,
142            c: 1.0,
143            alpha: 90.0,
144            beta: 90.0,
145            gamma: 90.0,
146            space_group: "P 1".to_string(),
147            z_value: 1,
148        }
149    }
150}
151
152/// A PDB model: a collection of atoms with optional remarks, title, and metadata.
153#[derive(Debug, Clone)]
154pub struct PdbModel {
155    /// All ATOM/HETATM records.
156    pub atoms: Vec<PdbAtom>,
157    /// REMARK lines (text after the "REMARK" keyword).
158    pub remarks: Vec<String>,
159    /// TITLE line content.
160    pub title: String,
161    /// CONECT records.
162    pub conect_records: Vec<ConectRecord>,
163    /// CRYST1 record (unit cell).
164    pub cryst1: Option<Cryst1Record>,
165    /// Model number (for multi-model PDB files).
166    pub model_number: Option<u32>,
167}
168
169/// A multi-model PDB file.
170#[derive(Debug, Clone)]
171pub struct PdbFile {
172    /// All models in the PDB file.
173    pub models: Vec<PdbModel>,
174    /// File-level remarks.
175    pub remarks: Vec<String>,
176    /// File-level title.
177    pub title: String,
178    /// File-level CRYST1 record.
179    pub cryst1: Option<Cryst1Record>,
180}
181
182/// Parse a single ATOM or HETATM line into a [`PdbAtom`].
183///
184/// Returns `None` if the line is not an ATOM/HETATM record or is too short to parse.
185pub fn parse_atom_line(line: &str) -> Option<PdbAtom> {
186    let record = &line[..line.len().min(6)];
187    let is_hetatm = record == "HETATM";
188    if record != "ATOM  " && !is_hetatm {
189        return None;
190    }
191    if line.len() < 54 {
192        return None;
193    }
194
195    let serial = line.get(6..11)?.trim().parse::<u32>().ok()?;
196    let name = line.get(12..16)?.trim().to_string();
197    let res_name = line.get(17..20)?.trim().to_string();
198    let chain_id = line.as_bytes().get(21).map_or('A', |&b| b as char);
199    let res_seq = line
200        .get(22..26)
201        .and_then(|s| s.trim().parse::<u32>().ok())
202        .unwrap_or(1);
203    let x = line.get(30..38)?.trim().parse::<f64>().ok()?;
204    let y = line.get(38..46)?.trim().parse::<f64>().ok()?;
205    let z = line.get(46..54)?.trim().parse::<f64>().ok()?;
206    let occupancy = line
207        .get(54..60)
208        .and_then(|s| s.trim().parse::<f64>().ok())
209        .unwrap_or(1.0);
210    let temp_factor = line
211        .get(60..66)
212        .and_then(|s| s.trim().parse::<f64>().ok())
213        .unwrap_or(0.0);
214    let element = line
215        .get(76..78)
216        .map(|s| s.trim().to_string())
217        .unwrap_or_default();
218
219    let mut atom = PdbAtom::new(PdbAtomData {
220        serial,
221        name,
222        res_name,
223        chain_id,
224        res_seq,
225        x,
226        y,
227        z,
228        occupancy,
229        temp_factor,
230        element,
231    });
232    atom.is_hetatm = is_hetatm;
233    Some(atom)
234}
235
236/// Parse a CONECT record line.
237pub fn parse_conect_line(line: &str) -> Option<ConectRecord> {
238    if !line.starts_with("CONECT") {
239        return None;
240    }
241    let parts: Vec<u32> = line[6..]
242        .split_whitespace()
243        .filter_map(|s| s.parse::<u32>().ok())
244        .collect();
245    if parts.is_empty() {
246        return None;
247    }
248    Some(ConectRecord {
249        atom_serial: parts[0],
250        bonded_atoms: parts[1..].to_vec(),
251    })
252}
253
254/// Parse a CRYST1 record line.
255pub fn parse_cryst1_line(line: &str) -> Option<Cryst1Record> {
256    if !line.starts_with("CRYST1") {
257        return None;
258    }
259    if line.len() < 54 {
260        return None;
261    }
262    let a = line.get(6..15)?.trim().parse::<f64>().ok()?;
263    let b = line.get(15..24)?.trim().parse::<f64>().ok()?;
264    let c = line.get(24..33)?.trim().parse::<f64>().ok()?;
265    let alpha = line.get(33..40)?.trim().parse::<f64>().ok()?;
266    let beta = line.get(40..47)?.trim().parse::<f64>().ok()?;
267    let gamma = line.get(47..54)?.trim().parse::<f64>().ok()?;
268    let space_group = line.get(55..66).unwrap_or("P 1").trim().to_string();
269    let z_value = line
270        .get(66..70)
271        .and_then(|s| s.trim().parse::<u32>().ok())
272        .unwrap_or(1);
273
274    Some(Cryst1Record {
275        a,
276        b,
277        c,
278        alpha,
279        beta,
280        gamma,
281        space_group,
282        z_value,
283    })
284}
285
286/// Parse PDB file content (as a `&str`) into a [`PdbModel`].
287///
288/// Handles TITLE, REMARK, ATOM, HETATM, CONECT, and CRYST1 records.
289pub fn parse_pdb(content: &str) -> Result<PdbModel> {
290    let mut atoms = Vec::new();
291    let mut remarks = Vec::new();
292    let mut title = String::new();
293    let mut conect_records = Vec::new();
294    let mut cryst1 = None;
295
296    for line in content.lines() {
297        if line.starts_with("TITLE") {
298            title = line.get(10..).unwrap_or("").trim().to_string();
299        } else if line.starts_with("REMARK") {
300            remarks.push(line.get(6..).unwrap_or("").trim().to_string());
301        } else if line.starts_with("ATOM  ") || line.starts_with("HETATM") {
302            match parse_atom_line(line) {
303                Some(atom) => atoms.push(atom),
304                None => return Err(Error::Parse(format!("Failed to parse atom line: {line}"))),
305            }
306        } else if line.starts_with("CONECT") {
307            if let Some(conect) = parse_conect_line(line) {
308                conect_records.push(conect);
309            }
310        } else if line.starts_with("CRYST1") {
311            cryst1 = parse_cryst1_line(line);
312        }
313    }
314
315    Ok(PdbModel {
316        atoms,
317        remarks,
318        title,
319        conect_records,
320        cryst1,
321        model_number: None,
322    })
323}
324
325/// Parse a multi-model PDB file.
326pub fn parse_multi_model_pdb(content: &str) -> Result<PdbFile> {
327    let mut models = Vec::new();
328    let mut file_remarks = Vec::new();
329    let mut file_title = String::new();
330    let mut file_cryst1 = None;
331
332    let mut current_atoms: Vec<PdbAtom> = Vec::new();
333    let mut current_model_num: Option<u32> = None;
334    let mut current_conect: Vec<ConectRecord> = Vec::new();
335    let mut in_model = false;
336
337    for line in content.lines() {
338        if line.starts_with("TITLE") {
339            file_title = line.get(10..).unwrap_or("").trim().to_string();
340        } else if line.starts_with("REMARK") {
341            file_remarks.push(line.get(6..).unwrap_or("").trim().to_string());
342        } else if line.starts_with("CRYST1") {
343            file_cryst1 = parse_cryst1_line(line);
344        } else if let Some(rest) = line.strip_prefix("MODEL") {
345            in_model = true;
346            current_model_num = rest.trim().parse::<u32>().ok();
347            current_atoms.clear();
348            current_conect.clear();
349        } else if line.starts_with("ENDMDL") {
350            models.push(PdbModel {
351                atoms: std::mem::take(&mut current_atoms),
352                remarks: Vec::new(),
353                title: String::new(),
354                conect_records: std::mem::take(&mut current_conect),
355                cryst1: file_cryst1.clone(),
356                model_number: current_model_num,
357            });
358            in_model = false;
359        } else if line.starts_with("ATOM  ") || line.starts_with("HETATM") {
360            if let Some(atom) = parse_atom_line(line) {
361                current_atoms.push(atom);
362            }
363        } else if line.starts_with("CONECT")
364            && let Some(conect) = parse_conect_line(line)
365        {
366            current_conect.push(conect);
367        }
368    }
369
370    // If no MODEL/ENDMDL was found, treat whole file as one model
371    if models.is_empty() && !current_atoms.is_empty() {
372        models.push(PdbModel {
373            atoms: current_atoms,
374            remarks: file_remarks.clone(),
375            title: file_title.clone(),
376            conect_records: current_conect,
377            cryst1: file_cryst1.clone(),
378            model_number: None,
379        });
380    } else if !in_model && !current_atoms.is_empty() {
381        // Atoms found after last ENDMDL
382        models.push(PdbModel {
383            atoms: current_atoms,
384            remarks: Vec::new(),
385            title: String::new(),
386            conect_records: current_conect,
387            cryst1: file_cryst1.clone(),
388            model_number: None,
389        });
390    }
391
392    Ok(PdbFile {
393        models,
394        remarks: file_remarks,
395        title: file_title,
396        cryst1: file_cryst1,
397    })
398}
399
400/// Write a [`PdbModel`] to a PDB fixed-column format string.
401pub fn write_pdb(model: &PdbModel) -> String {
402    let mut out = String::new();
403
404    if !model.title.is_empty() {
405        out.push_str(&format!("TITLE     {}\n", model.title));
406    }
407    for remark in &model.remarks {
408        out.push_str(&format!("REMARK {remark}\n"));
409    }
410
411    if let Some(ref cryst) = model.cryst1 {
412        out.push_str(&format!(
413            "CRYST1{:9.3}{:9.3}{:9.3}{:7.2}{:7.2}{:7.2} {:<11}{:>4}\n",
414            cryst.a,
415            cryst.b,
416            cryst.c,
417            cryst.alpha,
418            cryst.beta,
419            cryst.gamma,
420            cryst.space_group,
421            cryst.z_value,
422        ));
423    }
424
425    for atom in &model.atoms {
426        let record_type = if atom.is_hetatm { "HETATM" } else { "ATOM  " };
427        let atom_name = if atom.name.len() < 4 {
428            format!(" {:<3}", atom.name)
429        } else {
430            format!("{:<4}", atom.name)
431        };
432        let element_field = if atom.element.is_empty() {
433            "  ".to_string()
434        } else {
435            format!("{:>2}", atom.element)
436        };
437        out.push_str(&format!(
438            "{}{:>5} {} {:>3} {}{:>4}    {:8.3}{:8.3}{:8.3}{:6.2}{:6.2}          {}\n",
439            record_type,
440            atom.serial,
441            atom_name,
442            atom.res_name,
443            atom.chain_id,
444            atom.res_seq,
445            atom.x,
446            atom.y,
447            atom.z,
448            atom.occupancy,
449            atom.temp_factor,
450            element_field,
451        ));
452    }
453
454    for conect in &model.conect_records {
455        let bonded: Vec<String> = conect
456            .bonded_atoms
457            .iter()
458            .map(|a| format!("{:>5}", a))
459            .collect();
460        out.push_str(&format!(
461            "CONECT{:>5}{}\n",
462            conect.atom_serial,
463            bonded.join("")
464        ));
465    }
466
467    out.push_str("END\n");
468    out
469}
470
471/// Write a multi-model PDB file.
472pub fn write_multi_model_pdb(pdb_file: &PdbFile) -> String {
473    let mut out = String::new();
474
475    if !pdb_file.title.is_empty() {
476        out.push_str(&format!("TITLE     {}\n", pdb_file.title));
477    }
478    for remark in &pdb_file.remarks {
479        out.push_str(&format!("REMARK {remark}\n"));
480    }
481    if let Some(ref cryst) = pdb_file.cryst1 {
482        out.push_str(&format!(
483            "CRYST1{:9.3}{:9.3}{:9.3}{:7.2}{:7.2}{:7.2} {:<11}{:>4}\n",
484            cryst.a,
485            cryst.b,
486            cryst.c,
487            cryst.alpha,
488            cryst.beta,
489            cryst.gamma,
490            cryst.space_group,
491            cryst.z_value,
492        ));
493    }
494
495    for (i, model) in pdb_file.models.iter().enumerate() {
496        let model_num = model.model_number.unwrap_or((i + 1) as u32);
497        out.push_str(&format!("MODEL     {:>4}\n", model_num));
498
499        for atom in &model.atoms {
500            let record_type = if atom.is_hetatm { "HETATM" } else { "ATOM  " };
501            let atom_name = if atom.name.len() < 4 {
502                format!(" {:<3}", atom.name)
503            } else {
504                format!("{:<4}", atom.name)
505            };
506            let element_field = if atom.element.is_empty() {
507                "  ".to_string()
508            } else {
509                format!("{:>2}", atom.element)
510            };
511            out.push_str(&format!(
512                "{}{:>5} {} {:>3} {}{:>4}    {:8.3}{:8.3}{:8.3}{:6.2}{:6.2}          {}\n",
513                record_type,
514                atom.serial,
515                atom_name,
516                atom.res_name,
517                atom.chain_id,
518                atom.res_seq,
519                atom.x,
520                atom.y,
521                atom.z,
522                atom.occupancy,
523                atom.temp_factor,
524                element_field,
525            ));
526        }
527        for conect in &model.conect_records {
528            let bonded: Vec<String> = conect
529                .bonded_atoms
530                .iter()
531                .map(|a| format!("{:>5}", a))
532                .collect();
533            out.push_str(&format!(
534                "CONECT{:>5}{}\n",
535                conect.atom_serial,
536                bonded.join("")
537            ));
538        }
539        out.push_str("ENDMDL\n");
540    }
541    out.push_str("END\n");
542    out
543}
544
545/// Filter atoms by chain ID.
546pub fn filter_by_chain(atoms: &[PdbAtom], chain: char) -> Vec<PdbAtom> {
547    atoms
548        .iter()
549        .filter(|a| a.chain_id == chain)
550        .cloned()
551        .collect()
552}
553
554/// Get all unique chain IDs from a list of atoms.
555pub fn unique_chain_ids(atoms: &[PdbAtom]) -> Vec<char> {
556    let mut chains: Vec<char> = atoms.iter().map(|a| a.chain_id).collect();
557    chains.sort_unstable();
558    chains.dedup();
559    chains
560}
561
562/// Get all unique residue names from a list of atoms.
563pub fn unique_residue_names(atoms: &[PdbAtom]) -> Vec<String> {
564    let mut names: Vec<String> = atoms.iter().map(|a| a.res_name.clone()).collect();
565    names.sort();
566    names.dedup();
567    names
568}
569
570/// Compute the center of mass of atoms (equal mass).
571pub fn center_of_mass(atoms: &[PdbAtom]) -> [f64; 3] {
572    if atoms.is_empty() {
573        return [0.0; 3];
574    }
575    let n = atoms.len() as f64;
576    let mut com = [0.0; 3];
577    for a in atoms {
578        com[0] += a.x;
579        com[1] += a.y;
580        com[2] += a.z;
581    }
582    [com[0] / n, com[1] / n, com[2] / n]
583}
584
585// ---------------------------------------------------------------------------
586// Legacy file-based writer / reader (kept for backward compatibility)
587// ---------------------------------------------------------------------------
588
589/// Writer for PDB format files.
590pub struct PdbWriter;
591
592impl PdbWriter {
593    /// Write a single frame of atom data to a PDB file.
594    ///
595    /// Each atom is written as an ATOM record following PDB column conventions.
596    pub fn write_frame(path: &str, atoms: &[PdbAtom]) -> Result<()> {
597        let model = PdbModel {
598            atoms: atoms.to_vec(),
599            remarks: Vec::new(),
600            title: String::new(),
601            conect_records: Vec::new(),
602            cryst1: None,
603            model_number: None,
604        };
605        let content = write_pdb(&model);
606        let file = File::create(Path::new(path))?;
607        let mut w = BufWriter::new(file);
608        w.write_all(content.as_bytes())?;
609        w.flush()?;
610        Ok(())
611    }
612}
613
614/// Reader for PDB format files (basic ATOM records only).
615pub struct PdbReader;
616
617impl PdbReader {
618    /// Read ATOM records from a PDB file.
619    pub fn read(path: &str) -> Result<Vec<PdbAtom>> {
620        let file = File::open(Path::new(path))?;
621        let reader = BufReader::new(file);
622        let mut content = String::new();
623        for line in reader.lines() {
624            content.push_str(&line?);
625            content.push('\n');
626        }
627        let model = parse_pdb(&content)?;
628        Ok(model.atoms)
629    }
630}
631
632// ---------------------------------------------------------------------------
633// SEQRES Parsing
634// ---------------------------------------------------------------------------
635
636/// A SEQRES record describing the primary sequence of a chain.
637#[derive(Debug, Clone)]
638pub struct SeqresRecord {
639    /// Chain identifier.
640    pub chain_id: char,
641    /// Total number of residues in the chain.
642    pub num_residues: u32,
643    /// List of 3-letter residue names.
644    pub residues: Vec<String>,
645}
646
647impl SeqresRecord {
648    /// Return the sequence as a dash-separated string of 3-letter codes.
649    pub fn sequence_string(&self) -> String {
650        self.residues.join("-")
651    }
652
653    /// Return a 1-letter code sequence (standard amino acids only; unknown → '?').
654    pub fn one_letter_sequence(&self) -> String {
655        self.residues.iter().map(|r| aa3_to_1(r)).collect()
656    }
657}
658
659/// Parse all SEQRES records from PDB content.
660pub fn parse_seqres(content: &str) -> Vec<SeqresRecord> {
661    // Group SEQRES lines by chain ID
662    let mut chains: std::collections::HashMap<char, (u32, Vec<String>)> =
663        std::collections::HashMap::new();
664
665    for line in content.lines() {
666        if !line.starts_with("SEQRES") {
667            continue;
668        }
669        if line.len() < 19 {
670            continue;
671        }
672
673        let chain_id = line.as_bytes().get(11).map_or('A', |&b| b as char);
674        let num_res: u32 = line
675            .get(13..17)
676            .and_then(|s| s.trim().parse().ok())
677            .unwrap_or(0);
678
679        // Residue names start at col 19, each is 4 chars (padded with space)
680        let residues_part = line.get(19..).unwrap_or("");
681        let residues: Vec<String> = residues_part
682            .split_whitespace()
683            .map(|s| s.to_string())
684            .collect();
685
686        let entry = chains.entry(chain_id).or_insert((0, Vec::new()));
687        if entry.0 == 0 {
688            entry.0 = num_res;
689        }
690        entry.1.extend(residues);
691    }
692
693    let mut records: Vec<SeqresRecord> = chains
694        .into_iter()
695        .map(|(chain_id, (num_residues, residues))| SeqresRecord {
696            chain_id,
697            num_residues,
698            residues,
699        })
700        .collect();
701    records.sort_by_key(|r| r.chain_id);
702    records
703}
704
705/// Convert 3-letter amino acid code to 1-letter code.
706fn aa3_to_1(aa: &str) -> char {
707    match aa {
708        "ALA" => 'A',
709        "ARG" => 'R',
710        "ASN" => 'N',
711        "ASP" => 'D',
712        "CYS" => 'C',
713        "GLN" => 'Q',
714        "GLU" => 'E',
715        "GLY" => 'G',
716        "HIS" => 'H',
717        "ILE" => 'I',
718        "LEU" => 'L',
719        "LYS" => 'K',
720        "MET" => 'M',
721        "PHE" => 'F',
722        "PRO" => 'P',
723        "SER" => 'S',
724        "THR" => 'T',
725        "TRP" => 'W',
726        "TYR" => 'Y',
727        "VAL" => 'V',
728        _ => '?',
729    }
730}
731
732// ---------------------------------------------------------------------------
733// Secondary Structure Records
734// ---------------------------------------------------------------------------
735
736/// Type of secondary structure element.
737#[derive(Debug, Clone, Copy, PartialEq, Eq)]
738pub enum SecStructType {
739    /// Alpha helix (HELIX record).
740    Helix,
741    /// Beta sheet (SHEET record).
742    Sheet,
743    /// Turn (TURN record).
744    Turn,
745}
746
747/// A secondary structure record (HELIX, SHEET, or TURN).
748#[derive(Debug, Clone)]
749pub struct SecStructRecord {
750    /// Type of secondary structure.
751    pub record_type: SecStructType,
752    /// Chain identifier.
753    pub chain_id: char,
754    /// Starting residue sequence number.
755    pub start_res: u32,
756    /// Ending residue sequence number.
757    pub end_res: u32,
758    /// Starting residue name.
759    pub start_res_name: String,
760    /// Ending residue name.
761    pub end_res_name: String,
762}
763
764/// Parse HELIX, SHEET, and TURN records from PDB content.
765pub fn parse_secondary_structure(content: &str) -> Vec<SecStructRecord> {
766    let mut records = Vec::new();
767
768    for line in content.lines() {
769        if line.starts_with("HELIX") {
770            if line.len() < 26 {
771                continue;
772            }
773            let chain_id = line.as_bytes().get(19).map_or('A', |&b| b as char);
774            let start_res_name = line.get(15..18).unwrap_or("   ").trim().to_string();
775            let start_res: u32 = line
776                .get(21..25)
777                .and_then(|s| s.trim().parse().ok())
778                .unwrap_or(0);
779            let end_res_name = line.get(27..30).unwrap_or("   ").trim().to_string();
780            let end_res: u32 = line
781                .get(33..37)
782                .and_then(|s| s.trim().parse().ok())
783                .unwrap_or(0);
784            records.push(SecStructRecord {
785                record_type: SecStructType::Helix,
786                chain_id,
787                start_res,
788                end_res,
789                start_res_name,
790                end_res_name,
791            });
792        } else if line.starts_with("SHEET") {
793            if line.len() < 22 {
794                continue;
795            }
796            let chain_id = line.as_bytes().get(21).map_or('A', |&b| b as char);
797            let start_res_name = line.get(17..20).unwrap_or("   ").trim().to_string();
798            let start_res: u32 = line
799                .get(22..26)
800                .and_then(|s| s.trim().parse().ok())
801                .unwrap_or(0);
802            let end_res_name = line.get(28..31).unwrap_or("   ").trim().to_string();
803            let end_res: u32 = line
804                .get(33..37)
805                .and_then(|s| s.trim().parse().ok())
806                .unwrap_or(0);
807            records.push(SecStructRecord {
808                record_type: SecStructType::Sheet,
809                chain_id,
810                start_res,
811                end_res,
812                start_res_name,
813                end_res_name,
814            });
815        } else if line.starts_with("TURN") {
816            if line.len() < 22 {
817                continue;
818            }
819            let chain_id = line.as_bytes().get(19).map_or('A', |&b| b as char);
820            let start_res_name = line.get(15..18).unwrap_or("   ").trim().to_string();
821            let start_res: u32 = line
822                .get(20..24)
823                .and_then(|s| s.trim().parse().ok())
824                .unwrap_or(0);
825            let end_res_name = line.get(26..29).unwrap_or("   ").trim().to_string();
826            let end_res: u32 = line
827                .get(30..34)
828                .and_then(|s| s.trim().parse().ok())
829                .unwrap_or(0);
830            records.push(SecStructRecord {
831                record_type: SecStructType::Turn,
832                chain_id,
833                start_res,
834                end_res,
835                start_res_name,
836                end_res_name,
837            });
838        }
839    }
840
841    records
842}
843
844// ---------------------------------------------------------------------------
845// Biological Assembly
846// ---------------------------------------------------------------------------
847
848/// A chain of atoms within a biological assembly.
849#[derive(Debug, Clone)]
850pub struct AssemblyChain {
851    /// Chain identifier.
852    pub chain_id: char,
853    /// All atoms in this chain.
854    pub atoms: Vec<PdbAtom>,
855}
856
857/// A biological assembly: one or more copies of the asymmetric unit
858/// arranged by crystallographic symmetry.
859#[derive(Debug, Clone)]
860pub struct BiologicalAssembly {
861    /// Assembly number (as in REMARK 350).
862    pub id: u32,
863    /// All chains in this assembly.
864    pub chains: Vec<AssemblyChain>,
865}
866
867impl BiologicalAssembly {
868    /// Build a biological assembly from a `PdbModel` using the identity transform.
869    pub fn from_model_with_identity(model: &PdbModel, id: u32) -> Self {
870        // Group atoms by chain
871        let mut chain_map: std::collections::HashMap<char, Vec<PdbAtom>> =
872            std::collections::HashMap::new();
873        for atom in &model.atoms {
874            chain_map
875                .entry(atom.chain_id)
876                .or_default()
877                .push(atom.clone());
878        }
879        let mut chains: Vec<AssemblyChain> = chain_map
880            .into_iter()
881            .map(|(chain_id, atoms)| AssemblyChain { chain_id, atoms })
882            .collect();
883        chains.sort_by_key(|c| c.chain_id);
884        Self { id, chains }
885    }
886
887    /// Apply a 4×3 rotation-translation matrix to all atoms.
888    ///
889    /// Matrix format: `[[r00,r01,r02,tx\],[r10,r11,r12,ty],[r20,r21,r22,tz]]`
890    pub fn apply_transform(&self, m: &[[f64; 4]; 3]) -> Self {
891        let transform_atom = |atom: &PdbAtom| -> PdbAtom {
892            let x = m[0][0] * atom.x + m[0][1] * atom.y + m[0][2] * atom.z + m[0][3];
893            let y = m[1][0] * atom.x + m[1][1] * atom.y + m[1][2] * atom.z + m[1][3];
894            let z = m[2][0] * atom.x + m[2][1] * atom.y + m[2][2] * atom.z + m[2][3];
895            let mut a = atom.clone();
896            a.x = x;
897            a.y = y;
898            a.z = z;
899            a
900        };
901        let chains = self
902            .chains
903            .iter()
904            .map(|c| AssemblyChain {
905                chain_id: c.chain_id,
906                atoms: c.atoms.iter().map(transform_atom).collect(),
907            })
908            .collect();
909        Self {
910            id: self.id,
911            chains,
912        }
913    }
914
915    /// Total number of atoms across all chains.
916    pub fn total_atoms(&self) -> usize {
917        self.chains.iter().map(|c| c.atoms.len()).sum()
918    }
919
920    /// All chain IDs present.
921    pub fn chain_ids(&self) -> Vec<char> {
922        self.chains.iter().map(|c| c.chain_id).collect()
923    }
924}
925
926// ---------------------------------------------------------------------------
927// Symmetry Operations
928// ---------------------------------------------------------------------------
929
930/// A crystallographic symmetry operation: rotation + translation.
931///
932/// Stored as a 3×4 matrix: `[[r00,r01,r02,tx\],[r10,r11,r12,ty],[r20,r21,r22,tz]]`.
933#[derive(Debug, Clone)]
934pub struct SymmetryOperation {
935    /// The 3×4 transformation matrix.
936    pub matrix: [[f64; 4]; 3],
937    /// Optional human-readable description (e.g., "X,Y,Z").
938    pub description: String,
939}
940
941impl SymmetryOperation {
942    /// The identity operation.
943    pub fn identity() -> Self {
944        Self {
945            matrix: [
946                [1.0, 0.0, 0.0, 0.0],
947                [0.0, 1.0, 0.0, 0.0],
948                [0.0, 0.0, 1.0, 0.0],
949            ],
950            description: "X,Y,Z".to_string(),
951        }
952    }
953
954    /// A pure translation.
955    pub fn translation(t: [f64; 3]) -> Self {
956        Self {
957            matrix: [
958                [1.0, 0.0, 0.0, t[0]],
959                [0.0, 1.0, 0.0, t[1]],
960                [0.0, 0.0, 1.0, t[2]],
961            ],
962            description: format!("T({},{},{})", t[0], t[1], t[2]),
963        }
964    }
965
966    /// Apply the symmetry operation to a 3D point.
967    pub fn apply(&self, p: [f64; 3]) -> [f64; 3] {
968        let m = &self.matrix;
969        [
970            m[0][0] * p[0] + m[0][1] * p[1] + m[0][2] * p[2] + m[0][3],
971            m[1][0] * p[0] + m[1][1] * p[1] + m[1][2] * p[2] + m[1][3],
972            m[2][0] * p[0] + m[2][1] * p[1] + m[2][2] * p[2] + m[2][3],
973        ]
974    }
975
976    /// Apply the symmetry operation to a list of atoms.
977    pub fn apply_to_atoms(&self, atoms: &[PdbAtom]) -> Vec<PdbAtom> {
978        atoms
979            .iter()
980            .map(|a| {
981                let [x, y, z] = self.apply([a.x, a.y, a.z]);
982                let mut new_a = a.clone();
983                new_a.x = x;
984                new_a.y = y;
985                new_a.z = z;
986                new_a
987            })
988            .collect()
989    }
990
991    /// Generate all symmetry-equivalent copies of the atoms.
992    pub fn generate_copies(ops: &[SymmetryOperation], atoms: &[PdbAtom]) -> Vec<Vec<PdbAtom>> {
993        ops.iter().map(|op| op.apply_to_atoms(atoms)).collect()
994    }
995}
996
997/// Parse REMARK 290 crystallographic symmetry operations from PDB content.
998pub fn parse_symmetry_operations(content: &str) -> Vec<SymmetryOperation> {
999    let mut ops = Vec::new();
1000    let mut in_remark_290 = false;
1001    let mut current_matrix: Vec<[f64; 4]> = Vec::new();
1002
1003    for line in content.lines() {
1004        if line.starts_with("REMARK 290") {
1005            in_remark_290 = true;
1006            // Look for SMTRY lines
1007            if line.contains("SMTRY") {
1008                let parts: Vec<&str> = line.split_whitespace().collect();
1009                if parts.len() >= 8
1010                    && let (Ok(r1), Ok(r2), Ok(r3), Ok(t)) = (
1011                        parts[4].parse::<f64>(),
1012                        parts[5].parse::<f64>(),
1013                        parts[6].parse::<f64>(),
1014                        parts[7].parse::<f64>(),
1015                    )
1016                {
1017                    current_matrix.push([r1, r2, r3, t]);
1018                    if current_matrix.len() == 3 {
1019                        ops.push(SymmetryOperation {
1020                            matrix: [current_matrix[0], current_matrix[1], current_matrix[2]],
1021                            description: String::new(),
1022                        });
1023                        current_matrix.clear();
1024                    }
1025                }
1026            }
1027        } else if in_remark_290 && !line.starts_with("REMARK") {
1028            in_remark_290 = false;
1029        }
1030    }
1031
1032    // Always include identity if no ops found
1033    if ops.is_empty() {
1034        ops.push(SymmetryOperation::identity());
1035    }
1036    ops
1037}
1038
1039// ---------------------------------------------------------------------------
1040// PDB Validation
1041// ---------------------------------------------------------------------------
1042
1043/// Result of validating a PDB model.
1044#[derive(Debug, Clone, Default)]
1045pub struct PdbValidationResult {
1046    /// Fatal errors (model should not be used as-is).
1047    pub errors: Vec<String>,
1048    /// Warnings (model is usable but has issues).
1049    pub warnings: Vec<String>,
1050}
1051
1052impl PdbValidationResult {
1053    /// Whether validation passed with no errors.
1054    pub fn is_valid(&self) -> bool {
1055        self.errors.is_empty()
1056    }
1057
1058    /// Whether there are any issues (errors or warnings).
1059    pub fn has_issues(&self) -> bool {
1060        !self.errors.is_empty() || !self.warnings.is_empty()
1061    }
1062}
1063
1064/// Validate a `PdbModel` for common errors and warnings.
1065///
1066/// Checks:
1067/// - Duplicate atom serial numbers (error)
1068/// - Atoms with extreme coordinates (warning)
1069/// - Missing occupancy (warning)
1070/// - Empty model (warning)
1071pub fn validate_pdb_model(model: &PdbModel) -> PdbValidationResult {
1072    let mut result = PdbValidationResult::default();
1073
1074    if model.atoms.is_empty() {
1075        result.warnings.push("model has no atoms".to_string());
1076        return result;
1077    }
1078
1079    // Check duplicate serial numbers
1080    let mut seen_serials = std::collections::HashSet::new();
1081    for atom in &model.atoms {
1082        if !seen_serials.insert(atom.serial) {
1083            result
1084                .errors
1085                .push(format!("duplicate atom serial number: {}", atom.serial));
1086        }
1087    }
1088
1089    // Check extreme coordinates
1090    for atom in &model.atoms {
1091        let max_coord = atom.x.abs().max(atom.y.abs()).max(atom.z.abs());
1092        if max_coord > 9999.0 {
1093            result.warnings.push(format!(
1094                "atom {} has extreme coordinate ({:.1}, {:.1}, {:.1})",
1095                atom.serial, atom.x, atom.y, atom.z
1096            ));
1097        }
1098    }
1099
1100    // Check occupancy values
1101    for atom in &model.atoms {
1102        if atom.occupancy < 0.0 || atom.occupancy > 1.0 {
1103            result.warnings.push(format!(
1104                "atom {} has out-of-range occupancy: {}",
1105                atom.serial, atom.occupancy
1106            ));
1107        }
1108    }
1109
1110    // Check B-factors
1111    for atom in &model.atoms {
1112        if atom.temp_factor < 0.0 {
1113            result.warnings.push(format!(
1114                "atom {} has negative B-factor: {}",
1115                atom.serial, atom.temp_factor
1116            ));
1117        }
1118    }
1119
1120    result
1121}
1122
1123/// Validate a full multi-model PDB file.
1124pub fn validate_pdb_file(pdb_file: &PdbFile) -> Vec<PdbValidationResult> {
1125    pdb_file.models.iter().map(validate_pdb_model).collect()
1126}
1127
1128#[cfg(test)]
1129mod tests {
1130    use super::*;
1131
1132    fn make_atom(
1133        serial: u32,
1134        name: &str,
1135        res: &str,
1136        chain: char,
1137        x: f64,
1138        y: f64,
1139        z: f64,
1140    ) -> PdbAtom {
1141        PdbAtom::new(PdbAtomData {
1142            serial,
1143            name: name.to_string(),
1144            res_name: res.to_string(),
1145            chain_id: chain,
1146            res_seq: serial,
1147            x,
1148            y,
1149            z,
1150            occupancy: 1.0,
1151            temp_factor: 0.0,
1152            element: String::new(),
1153        })
1154    }
1155
1156    #[test]
1157    fn test_pdb_write_and_read_roundtrip() {
1158        let path = std::env::temp_dir().join("oxiphy_test.pdb");
1159        let atoms = vec![
1160            make_atom(1, "CA", "ALA", 'A', 1.234, 5.678, 9.012),
1161            make_atom(2, "N", "GLY", 'A', -1.0, 2.5, 3.75),
1162        ];
1163        PdbWriter::write_frame(path.to_str().unwrap_or(""), &atoms).unwrap();
1164        let read_atoms = PdbReader::read(path.to_str().unwrap_or("")).unwrap();
1165        assert_eq!(read_atoms.len(), 2);
1166        assert!((read_atoms[0].x - 1.234).abs() < 0.01);
1167        assert!((read_atoms[1].z - 3.75).abs() < 0.01);
1168        assert_eq!(read_atoms[0].name, "CA");
1169        assert_eq!(read_atoms[1].res_name, "GLY");
1170        std::fs::remove_file(&path).ok();
1171    }
1172
1173    #[test]
1174    fn test_pdb_write_read_roundtrip() {
1175        let path = std::env::temp_dir().join("oxiphy_test_rt3.pdb");
1176        let atoms = vec![
1177            make_atom(1, "N", "ALA", 'A', 10.111, 20.222, 30.333),
1178            make_atom(2, "CA", "ALA", 'A', -5.555, 0.001, 15.999),
1179            make_atom(3, "C", "GLY", 'B', 0.0, 0.0, 0.0),
1180        ];
1181        PdbWriter::write_frame(path.to_str().unwrap_or(""), &atoms).unwrap();
1182        let read_atoms = PdbReader::read(path.to_str().unwrap_or("")).unwrap();
1183
1184        assert_eq!(read_atoms.len(), 3, "expected 3 atoms");
1185
1186        assert!((read_atoms[0].x - 10.111).abs() < 0.001);
1187        assert!((read_atoms[0].y - 20.222).abs() < 0.001);
1188        assert!((read_atoms[0].z - 30.333).abs() < 0.001);
1189        assert!((read_atoms[1].x - (-5.555)).abs() < 0.001);
1190        assert!((read_atoms[1].y - 0.001).abs() < 0.001);
1191        assert!((read_atoms[1].z - 15.999).abs() < 0.001);
1192        assert!((read_atoms[2].x - 0.0).abs() < 0.001);
1193        assert!((read_atoms[2].y - 0.0).abs() < 0.001);
1194        assert!((read_atoms[2].z - 0.0).abs() < 0.001);
1195
1196        std::fs::remove_file(&path).ok();
1197    }
1198
1199    #[test]
1200    fn test_pdb_column_widths() {
1201        let path = std::env::temp_dir().join("oxiphy_test_cols.pdb");
1202        let atoms = vec![make_atom(1, "CA", "ALA", 'A', 1.0, 2.0, 3.0)];
1203        PdbWriter::write_frame(path.to_str().unwrap_or(""), &atoms).unwrap();
1204        let content = std::fs::read_to_string(&path).unwrap();
1205        let line = content.lines().next().unwrap();
1206        assert!(line.starts_with("ATOM  "));
1207        let x_str = &line[30..38];
1208        assert!(x_str.trim().parse::<f64>().is_ok());
1209        std::fs::remove_file(&path).ok();
1210    }
1211
1212    #[test]
1213    fn test_parse_pdb_minimal() {
1214        let pdb_str = "\
1215TITLE     Test molecule
1216REMARK Test remark
1217ATOM      1  CA  ALA A   1       1.234   5.678   9.012  1.00  0.00           C
1218ATOM      2  N   GLY A   2      -1.000   2.500   3.750  1.00  0.00           N
1219END
1220";
1221        let model = parse_pdb(pdb_str).unwrap();
1222        assert_eq!(model.atoms.len(), 2);
1223        assert_eq!(model.title, "Test molecule");
1224        assert_eq!(model.remarks.len(), 1);
1225
1226        let a0 = &model.atoms[0];
1227        assert_eq!(a0.name, "CA");
1228        assert_eq!(a0.res_name, "ALA");
1229        assert!((a0.x - 1.234).abs() < 0.001);
1230        assert!((a0.y - 5.678).abs() < 0.001);
1231        assert!((a0.z - 9.012).abs() < 0.001);
1232
1233        let a1 = &model.atoms[1];
1234        assert_eq!(a1.name, "N");
1235        assert!((a1.x - (-1.0)).abs() < 0.001);
1236        assert!((a1.z - 3.75).abs() < 0.001);
1237    }
1238
1239    #[test]
1240    fn test_parse_write_parse_roundtrip() {
1241        let pdb_str = "\
1242TITLE     Roundtrip test
1243ATOM      1  CA  ALA A   1      10.000  20.000  30.000  1.00  5.00
1244ATOM      2  N   GLY B   2      -5.000   0.500  15.000  0.50 10.00
1245END
1246";
1247        let model1 = parse_pdb(pdb_str).unwrap();
1248        let written = write_pdb(&model1);
1249        let model2 = parse_pdb(&written).unwrap();
1250
1251        assert_eq!(model1.atoms.len(), model2.atoms.len());
1252        for (a, b) in model1.atoms.iter().zip(model2.atoms.iter()) {
1253            assert!((a.x - b.x).abs() < 0.001);
1254            assert!((a.y - b.y).abs() < 0.001);
1255            assert!((a.z - b.z).abs() < 0.001);
1256        }
1257    }
1258
1259    // ─── New tests ────────────────────────────────────────────────────
1260
1261    #[test]
1262    fn test_hetatm_parsing() {
1263        let pdb_str = "\
1264HETATM    1  O   HOH A   1       1.000   2.000   3.000  1.00  0.00           O
1265END
1266";
1267        let model = parse_pdb(pdb_str).unwrap();
1268        assert_eq!(model.atoms.len(), 1);
1269        assert!(model.atoms[0].is_hetatm);
1270        assert_eq!(model.atoms[0].res_name, "HOH");
1271    }
1272
1273    #[test]
1274    fn test_hetatm_write_roundtrip() {
1275        let mut atom = make_atom(1, "O", "HOH", 'A', 1.0, 2.0, 3.0);
1276        atom.is_hetatm = true;
1277        let model = PdbModel {
1278            atoms: vec![atom],
1279            remarks: Vec::new(),
1280            title: String::new(),
1281            conect_records: Vec::new(),
1282            cryst1: None,
1283            model_number: None,
1284        };
1285        let written = write_pdb(&model);
1286        assert!(written.contains("HETATM"));
1287        let parsed = parse_pdb(&written).unwrap();
1288        assert!(parsed.atoms[0].is_hetatm);
1289    }
1290
1291    #[test]
1292    fn test_conect_record_parsing() {
1293        let pdb_str = "\
1294ATOM      1  CA  ALA A   1       1.000   2.000   3.000  1.00  0.00           C
1295ATOM      2  N   ALA A   1       4.000   5.000   6.000  1.00  0.00           N
1296CONECT    1    2
1297END
1298";
1299        let model = parse_pdb(pdb_str).unwrap();
1300        assert_eq!(model.conect_records.len(), 1);
1301        assert_eq!(model.conect_records[0].atom_serial, 1);
1302        assert_eq!(model.conect_records[0].bonded_atoms, vec![2]);
1303    }
1304
1305    #[test]
1306    fn test_conect_multiple_bonds() {
1307        let line = "CONECT    1    2    3    4";
1308        let conect = parse_conect_line(line).unwrap();
1309        assert_eq!(conect.atom_serial, 1);
1310        assert_eq!(conect.bonded_atoms, vec![2, 3, 4]);
1311    }
1312
1313    #[test]
1314    fn test_conect_write_roundtrip() {
1315        let model = PdbModel {
1316            atoms: vec![
1317                make_atom(1, "CA", "ALA", 'A', 1.0, 0.0, 0.0),
1318                make_atom(2, "N", "ALA", 'A', 2.0, 0.0, 0.0),
1319            ],
1320            remarks: Vec::new(),
1321            title: String::new(),
1322            conect_records: vec![ConectRecord {
1323                atom_serial: 1,
1324                bonded_atoms: vec![2],
1325            }],
1326            cryst1: None,
1327            model_number: None,
1328        };
1329        let written = write_pdb(&model);
1330        assert!(written.contains("CONECT"));
1331        let parsed = parse_pdb(&written).unwrap();
1332        assert_eq!(parsed.conect_records.len(), 1);
1333        assert_eq!(parsed.conect_records[0].atom_serial, 1);
1334    }
1335
1336    #[test]
1337    fn test_cryst1_parsing() {
1338        let pdb_str = "\
1339CRYST1   50.000   60.000   70.000  90.00  90.00  90.00 P 1           1
1340ATOM      1  CA  ALA A   1       1.000   2.000   3.000  1.00  0.00
1341END
1342";
1343        let model = parse_pdb(pdb_str).unwrap();
1344        let cryst = model.cryst1.unwrap();
1345        assert!((cryst.a - 50.0).abs() < 0.01);
1346        assert!((cryst.b - 60.0).abs() < 0.01);
1347        assert!((cryst.c - 70.0).abs() < 0.01);
1348        assert!((cryst.alpha - 90.0).abs() < 0.01);
1349    }
1350
1351    #[test]
1352    fn test_cryst1_write_roundtrip() {
1353        let model = PdbModel {
1354            atoms: vec![make_atom(1, "CA", "ALA", 'A', 1.0, 2.0, 3.0)],
1355            remarks: Vec::new(),
1356            title: String::new(),
1357            conect_records: Vec::new(),
1358            cryst1: Some(Cryst1Record {
1359                a: 50.0,
1360                b: 60.0,
1361                c: 70.0,
1362                alpha: 90.0,
1363                beta: 90.0,
1364                gamma: 90.0,
1365                space_group: "P 1".to_string(),
1366                z_value: 1,
1367            }),
1368            model_number: None,
1369        };
1370        let written = write_pdb(&model);
1371        assert!(written.contains("CRYST1"));
1372        let parsed = parse_pdb(&written).unwrap();
1373        let cryst = parsed.cryst1.unwrap();
1374        assert!((cryst.a - 50.0).abs() < 0.01);
1375    }
1376
1377    #[test]
1378    fn test_multi_model_pdb() {
1379        let pdb_str = "\
1380TITLE     Multi model test
1381MODEL        1
1382ATOM      1  CA  ALA A   1       1.000   2.000   3.000  1.00  0.00
1383ENDMDL
1384MODEL        2
1385ATOM      1  CA  ALA A   1       4.000   5.000   6.000  1.00  0.00
1386ENDMDL
1387END
1388";
1389        let pdb_file = parse_multi_model_pdb(pdb_str).unwrap();
1390        assert_eq!(pdb_file.models.len(), 2);
1391        assert_eq!(pdb_file.title, "Multi model test");
1392        assert!((pdb_file.models[0].atoms[0].x - 1.0).abs() < 0.01);
1393        assert!((pdb_file.models[1].atoms[0].x - 4.0).abs() < 0.01);
1394        assert_eq!(pdb_file.models[0].model_number, Some(1));
1395        assert_eq!(pdb_file.models[1].model_number, Some(2));
1396    }
1397
1398    #[test]
1399    fn test_multi_model_write_roundtrip() {
1400        let pdb_file = PdbFile {
1401            models: vec![
1402                PdbModel {
1403                    atoms: vec![make_atom(1, "CA", "ALA", 'A', 1.0, 2.0, 3.0)],
1404                    remarks: Vec::new(),
1405                    title: String::new(),
1406                    conect_records: Vec::new(),
1407                    cryst1: None,
1408                    model_number: Some(1),
1409                },
1410                PdbModel {
1411                    atoms: vec![make_atom(1, "CA", "ALA", 'A', 4.0, 5.0, 6.0)],
1412                    remarks: Vec::new(),
1413                    title: String::new(),
1414                    conect_records: Vec::new(),
1415                    cryst1: None,
1416                    model_number: Some(2),
1417                },
1418            ],
1419            remarks: Vec::new(),
1420            title: "Test".to_string(),
1421            cryst1: None,
1422        };
1423        let written = write_multi_model_pdb(&pdb_file);
1424        assert!(written.contains("MODEL"));
1425        assert!(written.contains("ENDMDL"));
1426        let parsed = parse_multi_model_pdb(&written).unwrap();
1427        assert_eq!(parsed.models.len(), 2);
1428    }
1429
1430    #[test]
1431    fn test_single_model_as_multi() {
1432        let pdb_str = "\
1433ATOM      1  CA  ALA A   1       1.000   2.000   3.000  1.00  0.00
1434END
1435";
1436        let pdb_file = parse_multi_model_pdb(pdb_str).unwrap();
1437        assert_eq!(pdb_file.models.len(), 1);
1438    }
1439
1440    #[test]
1441    fn test_occupancy_and_bfactor() {
1442        let pdb_str = "\
1443ATOM      1  CA  ALA A   1       1.000   2.000   3.000  0.75 25.00
1444END
1445";
1446        let model = parse_pdb(pdb_str).unwrap();
1447        assert!((model.atoms[0].occupancy - 0.75).abs() < 0.01);
1448        assert!((model.atoms[0].temp_factor - 25.0).abs() < 0.01);
1449    }
1450
1451    #[test]
1452    fn test_occupancy_bfactor_roundtrip() {
1453        let atom = PdbAtom::new(PdbAtomData {
1454            serial: 1,
1455            name: "CA".into(),
1456            res_name: "ALA".into(),
1457            chain_id: 'A',
1458            res_seq: 1,
1459            x: 1.0,
1460            y: 2.0,
1461            z: 3.0,
1462            occupancy: 0.50,
1463            temp_factor: 30.0,
1464            element: "C".into(),
1465        });
1466        let model = PdbModel {
1467            atoms: vec![atom],
1468            remarks: Vec::new(),
1469            title: String::new(),
1470            conect_records: Vec::new(),
1471            cryst1: None,
1472            model_number: None,
1473        };
1474        let written = write_pdb(&model);
1475        let parsed = parse_pdb(&written).unwrap();
1476        assert!((parsed.atoms[0].occupancy - 0.50).abs() < 0.01);
1477        assert!((parsed.atoms[0].temp_factor - 30.0).abs() < 0.01);
1478    }
1479
1480    #[test]
1481    fn test_filter_by_chain() {
1482        let atoms = vec![
1483            make_atom(1, "CA", "ALA", 'A', 1.0, 0.0, 0.0),
1484            make_atom(2, "CA", "GLY", 'B', 2.0, 0.0, 0.0),
1485            make_atom(3, "N", "ALA", 'A', 3.0, 0.0, 0.0),
1486        ];
1487        let chain_a = filter_by_chain(&atoms, 'A');
1488        assert_eq!(chain_a.len(), 2);
1489        let chain_b = filter_by_chain(&atoms, 'B');
1490        assert_eq!(chain_b.len(), 1);
1491        let chain_c = filter_by_chain(&atoms, 'C');
1492        assert!(chain_c.is_empty());
1493    }
1494
1495    #[test]
1496    fn test_unique_chain_ids() {
1497        let atoms = vec![
1498            make_atom(1, "CA", "ALA", 'B', 0.0, 0.0, 0.0),
1499            make_atom(2, "CA", "ALA", 'A', 0.0, 0.0, 0.0),
1500            make_atom(3, "CA", "ALA", 'B', 0.0, 0.0, 0.0),
1501        ];
1502        let chains = unique_chain_ids(&atoms);
1503        assert_eq!(chains, vec!['A', 'B']);
1504    }
1505
1506    #[test]
1507    fn test_unique_residue_names() {
1508        let atoms = vec![
1509            make_atom(1, "CA", "GLY", 'A', 0.0, 0.0, 0.0),
1510            make_atom(2, "CA", "ALA", 'A', 0.0, 0.0, 0.0),
1511            make_atom(3, "CA", "GLY", 'A', 0.0, 0.0, 0.0),
1512        ];
1513        let names = unique_residue_names(&atoms);
1514        assert_eq!(names, vec!["ALA", "GLY"]);
1515    }
1516
1517    #[test]
1518    fn test_center_of_mass() {
1519        let atoms = vec![
1520            make_atom(1, "CA", "ALA", 'A', 0.0, 0.0, 0.0),
1521            make_atom(2, "CA", "ALA", 'A', 10.0, 0.0, 0.0),
1522        ];
1523        let com = center_of_mass(&atoms);
1524        assert!((com[0] - 5.0).abs() < 1e-10);
1525        assert!(com[1].abs() < 1e-10);
1526    }
1527
1528    #[test]
1529    fn test_center_of_mass_empty() {
1530        let com = center_of_mass(&[]);
1531        assert_eq!(com, [0.0, 0.0, 0.0]);
1532    }
1533
1534    #[test]
1535    fn test_atom_distance() {
1536        let a = make_atom(1, "CA", "ALA", 'A', 0.0, 0.0, 0.0);
1537        let b = make_atom(2, "CA", "ALA", 'A', 3.0, 4.0, 0.0);
1538        assert!((a.distance_to(&b) - 5.0).abs() < 1e-10);
1539    }
1540
1541    #[test]
1542    fn test_cryst1_default() {
1543        let cryst = Cryst1Record::default();
1544        assert!((cryst.a - 1.0).abs() < 1e-10);
1545        assert!((cryst.alpha - 90.0).abs() < 1e-10);
1546        assert_eq!(cryst.space_group, "P 1");
1547    }
1548
1549    #[test]
1550    fn test_conect_not_conect() {
1551        assert!(parse_conect_line("ATOM  stuff").is_none());
1552    }
1553
1554    #[test]
1555    fn test_cryst1_not_cryst1() {
1556        assert!(parse_cryst1_line("ATOM  stuff").is_none());
1557    }
1558
1559    #[test]
1560    fn test_parse_atom_line_too_short() {
1561        assert!(parse_atom_line("ATOM  short").is_none());
1562    }
1563
1564    // ── SEQRES tests ─────────────────────────────────────────────────────
1565
1566    #[test]
1567    fn test_seqres_parsing_basic() {
1568        let pdb_str = "\
1569SEQRES   1 A   10  ALA GLY VAL LEU ILE PRO PHE TRP MET SER
1570END
1571";
1572        let records = parse_seqres(pdb_str);
1573        assert_eq!(records.len(), 1);
1574        assert_eq!(records[0].chain_id, 'A');
1575        assert_eq!(records[0].num_residues, 10);
1576        assert_eq!(records[0].residues.len(), 10);
1577        assert_eq!(records[0].residues[0], "ALA");
1578    }
1579
1580    #[test]
1581    fn test_seqres_multi_chain() {
1582        let pdb_str = "\
1583SEQRES   1 A    5  ALA GLY VAL LEU ILE
1584SEQRES   1 B    3  TRP MET SER
1585END
1586";
1587        let records = parse_seqres(pdb_str);
1588        assert_eq!(records.len(), 2);
1589        let chains: Vec<char> = records.iter().map(|r| r.chain_id).collect();
1590        assert!(chains.contains(&'A'));
1591        assert!(chains.contains(&'B'));
1592    }
1593
1594    #[test]
1595    fn test_seqres_sequence_string() {
1596        let pdb_str = "\
1597SEQRES   1 A    3  ALA GLY VAL
1598END
1599";
1600        let records = parse_seqres(pdb_str);
1601        let seq = records[0].sequence_string();
1602        assert_eq!(seq, "ALA-GLY-VAL");
1603    }
1604
1605    // ── Secondary structure tests ──────────────────────────────────────
1606
1607    #[test]
1608    fn test_helix_record_parsing() {
1609        let pdb_str = "\
1610HELIX    1   1 ALA A    1  GLY A   10  1                                  10
1611END
1612";
1613        let records = parse_secondary_structure(pdb_str);
1614        let helices: Vec<_> = records
1615            .iter()
1616            .filter(|r| r.record_type == SecStructType::Helix)
1617            .collect();
1618        assert_eq!(helices.len(), 1);
1619        assert_eq!(helices[0].chain_id, 'A');
1620        assert_eq!(helices[0].start_res, 1);
1621        assert_eq!(helices[0].end_res, 10);
1622    }
1623
1624    #[test]
1625    fn test_sheet_record_parsing() {
1626        let pdb_str = "\
1627SHEET    1   A 2 ALA A   1  GLY A   5  0
1628END
1629";
1630        let records = parse_secondary_structure(pdb_str);
1631        let sheets: Vec<_> = records
1632            .iter()
1633            .filter(|r| r.record_type == SecStructType::Sheet)
1634            .collect();
1635        assert_eq!(sheets.len(), 1);
1636    }
1637
1638    #[test]
1639    fn test_turn_record_parsing() {
1640        let pdb_str = "\
1641TURN     1 T1  ALA A    1  GLY A    4
1642END
1643";
1644        let records = parse_secondary_structure(pdb_str);
1645        let turns: Vec<_> = records
1646            .iter()
1647            .filter(|r| r.record_type == SecStructType::Turn)
1648            .collect();
1649        assert_eq!(turns.len(), 1);
1650    }
1651
1652    // ── Biological assembly tests ──────────────────────────────────────
1653
1654    #[test]
1655    fn test_biological_assembly_construction() {
1656        let atoms = vec![
1657            make_atom(1, "CA", "ALA", 'A', 1.0, 0.0, 0.0),
1658            make_atom(2, "CA", "GLY", 'A', 2.0, 0.0, 0.0),
1659        ];
1660        let model = PdbModel {
1661            atoms: atoms.clone(),
1662            remarks: Vec::new(),
1663            title: String::new(),
1664            conect_records: Vec::new(),
1665            cryst1: None,
1666            model_number: None,
1667        };
1668        let assembly = BiologicalAssembly::from_model_with_identity(&model, 1);
1669        assert_eq!(assembly.id, 1);
1670        assert_eq!(assembly.chains.len(), 1); // chain 'A'
1671        assert_eq!(assembly.chains[0].atoms.len(), 2);
1672    }
1673
1674    #[test]
1675    fn test_biological_assembly_apply_transform() {
1676        let atoms = vec![make_atom(1, "CA", "ALA", 'A', 1.0, 0.0, 0.0)];
1677        let model = PdbModel {
1678            atoms,
1679            remarks: Vec::new(),
1680            title: String::new(),
1681            conect_records: Vec::new(),
1682            cryst1: None,
1683            model_number: None,
1684        };
1685        let assembly = BiologicalAssembly::from_model_with_identity(&model, 1);
1686        // Apply translation [1,0,0] → atom moves to [2,0,0]
1687        let transform = [
1688            [1.0, 0.0, 0.0, 1.0],
1689            [0.0, 1.0, 0.0, 0.0],
1690            [0.0, 0.0, 1.0, 0.0],
1691        ];
1692        let transformed = assembly.apply_transform(&transform);
1693        assert!((transformed.chains[0].atoms[0].x - 2.0).abs() < 1e-6);
1694    }
1695
1696    // ── Symmetry operations tests ──────────────────────────────────────
1697
1698    #[test]
1699    fn test_symmetry_operation_identity() {
1700        let sym = SymmetryOperation::identity();
1701        let pt = [1.0, 2.0, 3.0];
1702        let result = sym.apply(pt);
1703        assert!((result[0] - 1.0).abs() < 1e-10);
1704        assert!((result[1] - 2.0).abs() < 1e-10);
1705        assert!((result[2] - 3.0).abs() < 1e-10);
1706    }
1707
1708    #[test]
1709    fn test_symmetry_operation_translation() {
1710        let sym = SymmetryOperation::translation([5.0, 0.0, 0.0]);
1711        let pt = [1.0, 2.0, 3.0];
1712        let result = sym.apply(pt);
1713        assert!((result[0] - 6.0).abs() < 1e-10);
1714        assert!((result[1] - 2.0).abs() < 1e-10);
1715        assert!((result[2] - 3.0).abs() < 1e-10);
1716    }
1717
1718    #[test]
1719    fn test_symmetry_apply_to_atoms() {
1720        let atoms = vec![make_atom(1, "CA", "ALA", 'A', 1.0, 2.0, 3.0)];
1721        let sym = SymmetryOperation::translation([10.0, 0.0, 0.0]);
1722        let moved = sym.apply_to_atoms(&atoms);
1723        assert!((moved[0].x - 11.0).abs() < 1e-6);
1724    }
1725
1726    // ── PDB validation tests ───────────────────────────────────────────
1727
1728    #[test]
1729    fn test_pdb_validate_valid_model() {
1730        let atoms = vec![
1731            make_atom(1, "CA", "ALA", 'A', 1.0, 2.0, 3.0),
1732            make_atom(2, "N", "ALA", 'A', 2.0, 3.0, 4.0),
1733        ];
1734        let model = PdbModel {
1735            atoms,
1736            remarks: Vec::new(),
1737            title: "Test".to_string(),
1738            conect_records: Vec::new(),
1739            cryst1: None,
1740            model_number: None,
1741        };
1742        let result = validate_pdb_model(&model);
1743        assert!(result.is_valid(), "valid model should pass validation");
1744    }
1745
1746    #[test]
1747    fn test_pdb_validate_duplicate_serial() {
1748        let mut a1 = make_atom(1, "CA", "ALA", 'A', 1.0, 0.0, 0.0);
1749        let mut a2 = make_atom(1, "N", "ALA", 'A', 2.0, 0.0, 0.0); // duplicate serial
1750        a1.serial = 1;
1751        a2.serial = 1;
1752        let model = PdbModel {
1753            atoms: vec![a1, a2],
1754            remarks: Vec::new(),
1755            title: String::new(),
1756            conect_records: Vec::new(),
1757            cryst1: None,
1758            model_number: None,
1759        };
1760        let result = validate_pdb_model(&model);
1761        assert!(
1762            !result.is_valid(),
1763            "duplicate serials should fail validation"
1764        );
1765        assert!(!result.errors.is_empty());
1766    }
1767
1768    #[test]
1769    fn test_pdb_validate_extreme_coordinates() {
1770        let atom = PdbAtom {
1771            serial: 1,
1772            name: "CA".to_string(),
1773            res_name: "ALA".to_string(),
1774            chain_id: 'A',
1775            res_seq: 1,
1776            x: 1e10, // unreasonably large
1777            y: 0.0,
1778            z: 0.0,
1779            occupancy: 1.0,
1780            temp_factor: 0.0,
1781            element: "C".to_string(),
1782            is_hetatm: false,
1783        };
1784        let model = PdbModel {
1785            atoms: vec![atom],
1786            remarks: Vec::new(),
1787            title: String::new(),
1788            conect_records: Vec::new(),
1789            cryst1: None,
1790            model_number: None,
1791        };
1792        let result = validate_pdb_model(&model);
1793        assert!(
1794            result.warnings.iter().any(|w| w.contains("coordinate")),
1795            "extreme coordinate should generate a warning"
1796        );
1797    }
1798
1799    #[test]
1800    fn test_pdb_validate_empty_model() {
1801        let model = PdbModel {
1802            atoms: Vec::new(),
1803            remarks: Vec::new(),
1804            title: String::new(),
1805            conect_records: Vec::new(),
1806            cryst1: None,
1807            model_number: None,
1808        };
1809        let result = validate_pdb_model(&model);
1810        assert!(
1811            result.warnings.iter().any(|w| w.contains("no atoms")),
1812            "empty model should warn about missing atoms"
1813        );
1814    }
1815}