Skip to main content

oxiphysics_io/
pdb.rs

1#![allow(clippy::manual_strip)]
2// Copyright 2026 COOLJAPAN OU (Team KitaSan)
3// SPDX-License-Identifier: Apache-2.0
4
5//! PDB (Protein Data Bank) format writer and reader.
6//!
7//! Supports ATOM, HETATM, CONECT, CRYST1, multi-MODEL PDB files,
8//! occupancy/B-factor handling, and chain ID operations.
9
10use crate::{Error, Result};
11use oxiphysics_core::math::Vec3;
12use std::fs::File;
13use std::io::{BufRead, BufReader, BufWriter, Write};
14use std::path::Path;
15
16/// Represents a single atom record in PDB format.
17#[derive(Debug, Clone)]
18pub struct PdbAtom {
19    /// Atom serial number.
20    pub serial: u32,
21    /// Atom name (e.g., "CA", "N").
22    pub name: String,
23    /// Residue name (e.g., "ALA", "GLY").
24    pub res_name: String,
25    /// Chain identifier (single character, e.g., 'A').
26    pub chain_id: char,
27    /// Residue sequence number.
28    pub res_seq: u32,
29    /// X coordinate in angstroms.
30    pub x: f64,
31    /// Y coordinate in angstroms.
32    pub y: f64,
33    /// Z coordinate in angstroms.
34    pub z: f64,
35    /// Occupancy factor.
36    pub occupancy: f64,
37    /// Temperature (B) factor.
38    pub temp_factor: f64,
39    /// Element symbol.
40    pub element: String,
41    /// Whether this is a HETATM record (vs ATOM).
42    pub is_hetatm: bool,
43}
44
45impl PdbAtom {
46    /// Create a `PdbAtom` from all required fields.
47    #[allow(clippy::too_many_arguments)]
48    pub fn new(
49        serial: u32,
50        name: String,
51        res_name: String,
52        chain_id: char,
53        res_seq: u32,
54        x: f64,
55        y: f64,
56        z: f64,
57        occupancy: f64,
58        temp_factor: f64,
59        element: String,
60    ) -> Self {
61        PdbAtom {
62            serial,
63            name,
64            res_name,
65            chain_id,
66            res_seq,
67            x,
68            y,
69            z,
70            occupancy,
71            temp_factor,
72            element,
73            is_hetatm: false,
74        }
75    }
76
77    /// Return position as a `Vec3`.
78    pub fn position(&self) -> Vec3 {
79        Vec3::new(self.x, self.y, self.z)
80    }
81
82    /// Compute distance to another atom.
83    #[allow(dead_code)]
84    pub fn distance_to(&self, other: &PdbAtom) -> f64 {
85        let dx = self.x - other.x;
86        let dy = self.y - other.y;
87        let dz = self.z - other.z;
88        (dx * dx + dy * dy + dz * dz).sqrt()
89    }
90}
91
92/// A CONECT record: bonds between atoms.
93#[derive(Debug, Clone)]
94#[allow(dead_code)]
95pub struct ConectRecord {
96    /// Serial number of the atom.
97    pub atom_serial: u32,
98    /// Serial numbers of bonded atoms.
99    pub bonded_atoms: Vec<u32>,
100}
101
102/// A CRYST1 record: unit cell parameters.
103#[derive(Debug, Clone)]
104#[allow(dead_code)]
105pub struct Cryst1Record {
106    /// Unit cell lengths a, b, c in angstroms.
107    pub a: f64,
108    /// Unit cell length b.
109    pub b: f64,
110    /// Unit cell length c.
111    pub c: f64,
112    /// Unit cell angle alpha in degrees.
113    pub alpha: f64,
114    /// Unit cell angle beta in degrees.
115    pub beta: f64,
116    /// Unit cell angle gamma in degrees.
117    pub gamma: f64,
118    /// Space group.
119    pub space_group: String,
120    /// Z value (number of molecules per unit cell).
121    pub z_value: u32,
122}
123
124impl Default for Cryst1Record {
125    fn default() -> Self {
126        Cryst1Record {
127            a: 1.0,
128            b: 1.0,
129            c: 1.0,
130            alpha: 90.0,
131            beta: 90.0,
132            gamma: 90.0,
133            space_group: "P 1".to_string(),
134            z_value: 1,
135        }
136    }
137}
138
139/// A PDB model: a collection of atoms with optional remarks, title, and metadata.
140#[derive(Debug, Clone)]
141#[allow(dead_code)]
142pub struct PdbModel {
143    /// All ATOM/HETATM records.
144    pub atoms: Vec<PdbAtom>,
145    /// REMARK lines (text after the "REMARK" keyword).
146    pub remarks: Vec<String>,
147    /// TITLE line content.
148    pub title: String,
149    /// CONECT records.
150    pub conect_records: Vec<ConectRecord>,
151    /// CRYST1 record (unit cell).
152    pub cryst1: Option<Cryst1Record>,
153    /// Model number (for multi-model PDB files).
154    pub model_number: Option<u32>,
155}
156
157/// A multi-model PDB file.
158#[derive(Debug, Clone)]
159#[allow(dead_code)]
160pub struct PdbFile {
161    /// All models in the PDB file.
162    pub models: Vec<PdbModel>,
163    /// File-level remarks.
164    pub remarks: Vec<String>,
165    /// File-level title.
166    pub title: String,
167    /// File-level CRYST1 record.
168    pub cryst1: Option<Cryst1Record>,
169}
170
171/// Parse a single ATOM or HETATM line into a [`PdbAtom`].
172///
173/// Returns `None` if the line is not an ATOM/HETATM record or is too short to parse.
174#[allow(dead_code)]
175pub fn parse_atom_line(line: &str) -> Option<PdbAtom> {
176    let record = &line[..line.len().min(6)];
177    let is_hetatm = record == "HETATM";
178    if record != "ATOM  " && !is_hetatm {
179        return None;
180    }
181    if line.len() < 54 {
182        return None;
183    }
184
185    let serial = line.get(6..11)?.trim().parse::<u32>().ok()?;
186    let name = line.get(12..16)?.trim().to_string();
187    let res_name = line.get(17..20)?.trim().to_string();
188    let chain_id = line.as_bytes().get(21).map_or('A', |&b| b as char);
189    let res_seq = line
190        .get(22..26)
191        .and_then(|s| s.trim().parse::<u32>().ok())
192        .unwrap_or(1);
193    let x = line.get(30..38)?.trim().parse::<f64>().ok()?;
194    let y = line.get(38..46)?.trim().parse::<f64>().ok()?;
195    let z = line.get(46..54)?.trim().parse::<f64>().ok()?;
196    let occupancy = line
197        .get(54..60)
198        .and_then(|s| s.trim().parse::<f64>().ok())
199        .unwrap_or(1.0);
200    let temp_factor = line
201        .get(60..66)
202        .and_then(|s| s.trim().parse::<f64>().ok())
203        .unwrap_or(0.0);
204    let element = line
205        .get(76..78)
206        .map(|s| s.trim().to_string())
207        .unwrap_or_default();
208
209    let mut atom = PdbAtom::new(
210        serial,
211        name,
212        res_name,
213        chain_id,
214        res_seq,
215        x,
216        y,
217        z,
218        occupancy,
219        temp_factor,
220        element,
221    );
222    atom.is_hetatm = is_hetatm;
223    Some(atom)
224}
225
226/// Parse a CONECT record line.
227#[allow(dead_code)]
228pub fn parse_conect_line(line: &str) -> Option<ConectRecord> {
229    if !line.starts_with("CONECT") {
230        return None;
231    }
232    let parts: Vec<u32> = line[6..]
233        .split_whitespace()
234        .filter_map(|s| s.parse::<u32>().ok())
235        .collect();
236    if parts.is_empty() {
237        return None;
238    }
239    Some(ConectRecord {
240        atom_serial: parts[0],
241        bonded_atoms: parts[1..].to_vec(),
242    })
243}
244
245/// Parse a CRYST1 record line.
246#[allow(dead_code)]
247pub fn parse_cryst1_line(line: &str) -> Option<Cryst1Record> {
248    if !line.starts_with("CRYST1") {
249        return None;
250    }
251    if line.len() < 54 {
252        return None;
253    }
254    let a = line.get(6..15)?.trim().parse::<f64>().ok()?;
255    let b = line.get(15..24)?.trim().parse::<f64>().ok()?;
256    let c = line.get(24..33)?.trim().parse::<f64>().ok()?;
257    let alpha = line.get(33..40)?.trim().parse::<f64>().ok()?;
258    let beta = line.get(40..47)?.trim().parse::<f64>().ok()?;
259    let gamma = line.get(47..54)?.trim().parse::<f64>().ok()?;
260    let space_group = line.get(55..66).unwrap_or("P 1").trim().to_string();
261    let z_value = line
262        .get(66..70)
263        .and_then(|s| s.trim().parse::<u32>().ok())
264        .unwrap_or(1);
265
266    Some(Cryst1Record {
267        a,
268        b,
269        c,
270        alpha,
271        beta,
272        gamma,
273        space_group,
274        z_value,
275    })
276}
277
278/// Parse PDB file content (as a `&str`) into a [`PdbModel`].
279///
280/// Handles TITLE, REMARK, ATOM, HETATM, CONECT, and CRYST1 records.
281#[allow(dead_code)]
282pub fn parse_pdb(content: &str) -> Result<PdbModel> {
283    let mut atoms = Vec::new();
284    let mut remarks = Vec::new();
285    let mut title = String::new();
286    let mut conect_records = Vec::new();
287    let mut cryst1 = None;
288
289    for line in content.lines() {
290        if line.starts_with("TITLE") {
291            title = line.get(10..).unwrap_or("").trim().to_string();
292        } else if line.starts_with("REMARK") {
293            remarks.push(line.get(6..).unwrap_or("").trim().to_string());
294        } else if line.starts_with("ATOM  ") || line.starts_with("HETATM") {
295            match parse_atom_line(line) {
296                Some(atom) => atoms.push(atom),
297                None => return Err(Error::Parse(format!("Failed to parse atom line: {line}"))),
298            }
299        } else if line.starts_with("CONECT") {
300            if let Some(conect) = parse_conect_line(line) {
301                conect_records.push(conect);
302            }
303        } else if line.starts_with("CRYST1") {
304            cryst1 = parse_cryst1_line(line);
305        }
306    }
307
308    Ok(PdbModel {
309        atoms,
310        remarks,
311        title,
312        conect_records,
313        cryst1,
314        model_number: None,
315    })
316}
317
318/// Parse a multi-model PDB file.
319#[allow(dead_code)]
320pub fn parse_multi_model_pdb(content: &str) -> Result<PdbFile> {
321    let mut models = Vec::new();
322    let mut file_remarks = Vec::new();
323    let mut file_title = String::new();
324    let mut file_cryst1 = None;
325
326    let mut current_atoms: Vec<PdbAtom> = Vec::new();
327    let mut current_model_num: Option<u32> = None;
328    let mut current_conect: Vec<ConectRecord> = Vec::new();
329    let mut in_model = false;
330
331    for line in content.lines() {
332        if line.starts_with("TITLE") {
333            file_title = line.get(10..).unwrap_or("").trim().to_string();
334        } else if line.starts_with("REMARK") {
335            file_remarks.push(line.get(6..).unwrap_or("").trim().to_string());
336        } else if line.starts_with("CRYST1") {
337            file_cryst1 = parse_cryst1_line(line);
338        } else if line.starts_with("MODEL") {
339            in_model = true;
340            current_model_num = line[5..].trim().parse::<u32>().ok();
341            current_atoms.clear();
342            current_conect.clear();
343        } else if line.starts_with("ENDMDL") {
344            models.push(PdbModel {
345                atoms: std::mem::take(&mut current_atoms),
346                remarks: Vec::new(),
347                title: String::new(),
348                conect_records: std::mem::take(&mut current_conect),
349                cryst1: file_cryst1.clone(),
350                model_number: current_model_num,
351            });
352            in_model = false;
353        } else if line.starts_with("ATOM  ") || line.starts_with("HETATM") {
354            if let Some(atom) = parse_atom_line(line) {
355                current_atoms.push(atom);
356            }
357        } else if line.starts_with("CONECT")
358            && let Some(conect) = parse_conect_line(line)
359        {
360            current_conect.push(conect);
361        }
362    }
363
364    // If no MODEL/ENDMDL was found, treat whole file as one model
365    if models.is_empty() && !current_atoms.is_empty() {
366        models.push(PdbModel {
367            atoms: current_atoms,
368            remarks: file_remarks.clone(),
369            title: file_title.clone(),
370            conect_records: current_conect,
371            cryst1: file_cryst1.clone(),
372            model_number: None,
373        });
374    } else if !in_model && !current_atoms.is_empty() {
375        // Atoms found after last ENDMDL
376        models.push(PdbModel {
377            atoms: current_atoms,
378            remarks: Vec::new(),
379            title: String::new(),
380            conect_records: current_conect,
381            cryst1: file_cryst1.clone(),
382            model_number: None,
383        });
384    }
385
386    Ok(PdbFile {
387        models,
388        remarks: file_remarks,
389        title: file_title,
390        cryst1: file_cryst1,
391    })
392}
393
394/// Write a [`PdbModel`] to a PDB fixed-column format string.
395#[allow(dead_code)]
396pub fn write_pdb(model: &PdbModel) -> String {
397    let mut out = String::new();
398
399    if !model.title.is_empty() {
400        out.push_str(&format!("TITLE     {}\n", model.title));
401    }
402    for remark in &model.remarks {
403        out.push_str(&format!("REMARK {remark}\n"));
404    }
405
406    if let Some(ref cryst) = model.cryst1 {
407        out.push_str(&format!(
408            "CRYST1{:9.3}{:9.3}{:9.3}{:7.2}{:7.2}{:7.2} {:<11}{:>4}\n",
409            cryst.a,
410            cryst.b,
411            cryst.c,
412            cryst.alpha,
413            cryst.beta,
414            cryst.gamma,
415            cryst.space_group,
416            cryst.z_value,
417        ));
418    }
419
420    for atom in &model.atoms {
421        let record_type = if atom.is_hetatm { "HETATM" } else { "ATOM  " };
422        let atom_name = if atom.name.len() < 4 {
423            format!(" {:<3}", atom.name)
424        } else {
425            format!("{:<4}", atom.name)
426        };
427        let element_field = if atom.element.is_empty() {
428            "  ".to_string()
429        } else {
430            format!("{:>2}", atom.element)
431        };
432        out.push_str(&format!(
433            "{}{:>5} {} {:>3} {}{:>4}    {:8.3}{:8.3}{:8.3}{:6.2}{:6.2}          {}\n",
434            record_type,
435            atom.serial,
436            atom_name,
437            atom.res_name,
438            atom.chain_id,
439            atom.res_seq,
440            atom.x,
441            atom.y,
442            atom.z,
443            atom.occupancy,
444            atom.temp_factor,
445            element_field,
446        ));
447    }
448
449    for conect in &model.conect_records {
450        let bonded: Vec<String> = conect
451            .bonded_atoms
452            .iter()
453            .map(|a| format!("{:>5}", a))
454            .collect();
455        out.push_str(&format!(
456            "CONECT{:>5}{}\n",
457            conect.atom_serial,
458            bonded.join("")
459        ));
460    }
461
462    out.push_str("END\n");
463    out
464}
465
466/// Write a multi-model PDB file.
467#[allow(dead_code)]
468pub fn write_multi_model_pdb(pdb_file: &PdbFile) -> String {
469    let mut out = String::new();
470
471    if !pdb_file.title.is_empty() {
472        out.push_str(&format!("TITLE     {}\n", pdb_file.title));
473    }
474    for remark in &pdb_file.remarks {
475        out.push_str(&format!("REMARK {remark}\n"));
476    }
477    if let Some(ref cryst) = pdb_file.cryst1 {
478        out.push_str(&format!(
479            "CRYST1{:9.3}{:9.3}{:9.3}{:7.2}{:7.2}{:7.2} {:<11}{:>4}\n",
480            cryst.a,
481            cryst.b,
482            cryst.c,
483            cryst.alpha,
484            cryst.beta,
485            cryst.gamma,
486            cryst.space_group,
487            cryst.z_value,
488        ));
489    }
490
491    for (i, model) in pdb_file.models.iter().enumerate() {
492        let model_num = model.model_number.unwrap_or((i + 1) as u32);
493        out.push_str(&format!("MODEL     {:>4}\n", model_num));
494
495        for atom in &model.atoms {
496            let record_type = if atom.is_hetatm { "HETATM" } else { "ATOM  " };
497            let atom_name = if atom.name.len() < 4 {
498                format!(" {:<3}", atom.name)
499            } else {
500                format!("{:<4}", atom.name)
501            };
502            let element_field = if atom.element.is_empty() {
503                "  ".to_string()
504            } else {
505                format!("{:>2}", atom.element)
506            };
507            out.push_str(&format!(
508                "{}{:>5} {} {:>3} {}{:>4}    {:8.3}{:8.3}{:8.3}{:6.2}{:6.2}          {}\n",
509                record_type,
510                atom.serial,
511                atom_name,
512                atom.res_name,
513                atom.chain_id,
514                atom.res_seq,
515                atom.x,
516                atom.y,
517                atom.z,
518                atom.occupancy,
519                atom.temp_factor,
520                element_field,
521            ));
522        }
523        for conect in &model.conect_records {
524            let bonded: Vec<String> = conect
525                .bonded_atoms
526                .iter()
527                .map(|a| format!("{:>5}", a))
528                .collect();
529            out.push_str(&format!(
530                "CONECT{:>5}{}\n",
531                conect.atom_serial,
532                bonded.join("")
533            ));
534        }
535        out.push_str("ENDMDL\n");
536    }
537    out.push_str("END\n");
538    out
539}
540
541/// Filter atoms by chain ID.
542#[allow(dead_code)]
543pub fn filter_by_chain(atoms: &[PdbAtom], chain: char) -> Vec<PdbAtom> {
544    atoms
545        .iter()
546        .filter(|a| a.chain_id == chain)
547        .cloned()
548        .collect()
549}
550
551/// Get all unique chain IDs from a list of atoms.
552#[allow(dead_code)]
553pub fn unique_chain_ids(atoms: &[PdbAtom]) -> Vec<char> {
554    let mut chains: Vec<char> = atoms.iter().map(|a| a.chain_id).collect();
555    chains.sort_unstable();
556    chains.dedup();
557    chains
558}
559
560/// Get all unique residue names from a list of atoms.
561#[allow(dead_code)]
562pub fn unique_residue_names(atoms: &[PdbAtom]) -> Vec<String> {
563    let mut names: Vec<String> = atoms.iter().map(|a| a.res_name.clone()).collect();
564    names.sort();
565    names.dedup();
566    names
567}
568
569/// Compute the center of mass of atoms (equal mass).
570#[allow(dead_code)]
571pub fn center_of_mass(atoms: &[PdbAtom]) -> [f64; 3] {
572    if atoms.is_empty() {
573        return [0.0; 3];
574    }
575    let n = atoms.len() as f64;
576    let mut com = [0.0; 3];
577    for a in atoms {
578        com[0] += a.x;
579        com[1] += a.y;
580        com[2] += a.z;
581    }
582    [com[0] / n, com[1] / n, com[2] / n]
583}
584
585// ---------------------------------------------------------------------------
586// Legacy file-based writer / reader (kept for backward compatibility)
587// ---------------------------------------------------------------------------
588
589/// Writer for PDB format files.
590pub struct PdbWriter;
591
592impl PdbWriter {
593    /// Write a single frame of atom data to a PDB file.
594    ///
595    /// Each atom is written as an ATOM record following PDB column conventions.
596    pub fn write_frame(path: &str, atoms: &[PdbAtom]) -> Result<()> {
597        let model = PdbModel {
598            atoms: atoms.to_vec(),
599            remarks: Vec::new(),
600            title: String::new(),
601            conect_records: Vec::new(),
602            cryst1: None,
603            model_number: None,
604        };
605        let content = write_pdb(&model);
606        let file = File::create(Path::new(path))?;
607        let mut w = BufWriter::new(file);
608        w.write_all(content.as_bytes())?;
609        w.flush()?;
610        Ok(())
611    }
612}
613
614/// Reader for PDB format files (basic ATOM records only).
615pub struct PdbReader;
616
617impl PdbReader {
618    /// Read ATOM records from a PDB file.
619    pub fn read(path: &str) -> Result<Vec<PdbAtom>> {
620        let file = File::open(Path::new(path))?;
621        let reader = BufReader::new(file);
622        let mut content = String::new();
623        for line in reader.lines() {
624            content.push_str(&line?);
625            content.push('\n');
626        }
627        let model = parse_pdb(&content)?;
628        Ok(model.atoms)
629    }
630}
631
632// ---------------------------------------------------------------------------
633// SEQRES Parsing
634// ---------------------------------------------------------------------------
635
636/// A SEQRES record describing the primary sequence of a chain.
637#[derive(Debug, Clone)]
638#[allow(dead_code)]
639pub struct SeqresRecord {
640    /// Chain identifier.
641    pub chain_id: char,
642    /// Total number of residues in the chain.
643    pub num_residues: u32,
644    /// List of 3-letter residue names.
645    pub residues: Vec<String>,
646}
647
648#[allow(dead_code)]
649impl SeqresRecord {
650    /// Return the sequence as a dash-separated string of 3-letter codes.
651    pub fn sequence_string(&self) -> String {
652        self.residues.join("-")
653    }
654
655    /// Return a 1-letter code sequence (standard amino acids only; unknown → '?').
656    pub fn one_letter_sequence(&self) -> String {
657        self.residues.iter().map(|r| aa3_to_1(r)).collect()
658    }
659}
660
661/// Parse all SEQRES records from PDB content.
662#[allow(dead_code)]
663pub fn parse_seqres(content: &str) -> Vec<SeqresRecord> {
664    // Group SEQRES lines by chain ID
665    let mut chains: std::collections::HashMap<char, (u32, Vec<String>)> =
666        std::collections::HashMap::new();
667
668    for line in content.lines() {
669        if !line.starts_with("SEQRES") {
670            continue;
671        }
672        if line.len() < 19 {
673            continue;
674        }
675
676        let chain_id = line.as_bytes().get(11).map_or('A', |&b| b as char);
677        let num_res: u32 = line
678            .get(13..17)
679            .and_then(|s| s.trim().parse().ok())
680            .unwrap_or(0);
681
682        // Residue names start at col 19, each is 4 chars (padded with space)
683        let residues_part = line.get(19..).unwrap_or("");
684        let residues: Vec<String> = residues_part
685            .split_whitespace()
686            .map(|s| s.to_string())
687            .collect();
688
689        let entry = chains.entry(chain_id).or_insert((0, Vec::new()));
690        if entry.0 == 0 {
691            entry.0 = num_res;
692        }
693        entry.1.extend(residues);
694    }
695
696    let mut records: Vec<SeqresRecord> = chains
697        .into_iter()
698        .map(|(chain_id, (num_residues, residues))| SeqresRecord {
699            chain_id,
700            num_residues,
701            residues,
702        })
703        .collect();
704    records.sort_by_key(|r| r.chain_id);
705    records
706}
707
708/// Convert 3-letter amino acid code to 1-letter code.
709#[allow(dead_code)]
710fn aa3_to_1(aa: &str) -> char {
711    match aa {
712        "ALA" => 'A',
713        "ARG" => 'R',
714        "ASN" => 'N',
715        "ASP" => 'D',
716        "CYS" => 'C',
717        "GLN" => 'Q',
718        "GLU" => 'E',
719        "GLY" => 'G',
720        "HIS" => 'H',
721        "ILE" => 'I',
722        "LEU" => 'L',
723        "LYS" => 'K',
724        "MET" => 'M',
725        "PHE" => 'F',
726        "PRO" => 'P',
727        "SER" => 'S',
728        "THR" => 'T',
729        "TRP" => 'W',
730        "TYR" => 'Y',
731        "VAL" => 'V',
732        _ => '?',
733    }
734}
735
736// ---------------------------------------------------------------------------
737// Secondary Structure Records
738// ---------------------------------------------------------------------------
739
740/// Type of secondary structure element.
741#[derive(Debug, Clone, Copy, PartialEq, Eq)]
742#[allow(dead_code)]
743pub enum SecStructType {
744    /// Alpha helix (HELIX record).
745    Helix,
746    /// Beta sheet (SHEET record).
747    Sheet,
748    /// Turn (TURN record).
749    Turn,
750}
751
752/// A secondary structure record (HELIX, SHEET, or TURN).
753#[derive(Debug, Clone)]
754#[allow(dead_code)]
755pub struct SecStructRecord {
756    /// Type of secondary structure.
757    pub record_type: SecStructType,
758    /// Chain identifier.
759    pub chain_id: char,
760    /// Starting residue sequence number.
761    pub start_res: u32,
762    /// Ending residue sequence number.
763    pub end_res: u32,
764    /// Starting residue name.
765    pub start_res_name: String,
766    /// Ending residue name.
767    pub end_res_name: String,
768}
769
770/// Parse HELIX, SHEET, and TURN records from PDB content.
771#[allow(dead_code)]
772pub fn parse_secondary_structure(content: &str) -> Vec<SecStructRecord> {
773    let mut records = Vec::new();
774
775    for line in content.lines() {
776        if line.starts_with("HELIX") {
777            if line.len() < 26 {
778                continue;
779            }
780            let chain_id = line.as_bytes().get(19).map_or('A', |&b| b as char);
781            let start_res_name = line.get(15..18).unwrap_or("   ").trim().to_string();
782            let start_res: u32 = line
783                .get(21..25)
784                .and_then(|s| s.trim().parse().ok())
785                .unwrap_or(0);
786            let end_res_name = line.get(27..30).unwrap_or("   ").trim().to_string();
787            let end_res: u32 = line
788                .get(33..37)
789                .and_then(|s| s.trim().parse().ok())
790                .unwrap_or(0);
791            records.push(SecStructRecord {
792                record_type: SecStructType::Helix,
793                chain_id,
794                start_res,
795                end_res,
796                start_res_name,
797                end_res_name,
798            });
799        } else if line.starts_with("SHEET") {
800            if line.len() < 22 {
801                continue;
802            }
803            let chain_id = line.as_bytes().get(21).map_or('A', |&b| b as char);
804            let start_res_name = line.get(17..20).unwrap_or("   ").trim().to_string();
805            let start_res: u32 = line
806                .get(22..26)
807                .and_then(|s| s.trim().parse().ok())
808                .unwrap_or(0);
809            let end_res_name = line.get(28..31).unwrap_or("   ").trim().to_string();
810            let end_res: u32 = line
811                .get(33..37)
812                .and_then(|s| s.trim().parse().ok())
813                .unwrap_or(0);
814            records.push(SecStructRecord {
815                record_type: SecStructType::Sheet,
816                chain_id,
817                start_res,
818                end_res,
819                start_res_name,
820                end_res_name,
821            });
822        } else if line.starts_with("TURN") {
823            if line.len() < 22 {
824                continue;
825            }
826            let chain_id = line.as_bytes().get(19).map_or('A', |&b| b as char);
827            let start_res_name = line.get(15..18).unwrap_or("   ").trim().to_string();
828            let start_res: u32 = line
829                .get(20..24)
830                .and_then(|s| s.trim().parse().ok())
831                .unwrap_or(0);
832            let end_res_name = line.get(26..29).unwrap_or("   ").trim().to_string();
833            let end_res: u32 = line
834                .get(30..34)
835                .and_then(|s| s.trim().parse().ok())
836                .unwrap_or(0);
837            records.push(SecStructRecord {
838                record_type: SecStructType::Turn,
839                chain_id,
840                start_res,
841                end_res,
842                start_res_name,
843                end_res_name,
844            });
845        }
846    }
847
848    records
849}
850
851// ---------------------------------------------------------------------------
852// Biological Assembly
853// ---------------------------------------------------------------------------
854
855/// A chain of atoms within a biological assembly.
856#[derive(Debug, Clone)]
857#[allow(dead_code)]
858pub struct AssemblyChain {
859    /// Chain identifier.
860    pub chain_id: char,
861    /// All atoms in this chain.
862    pub atoms: Vec<PdbAtom>,
863}
864
865/// A biological assembly: one or more copies of the asymmetric unit
866/// arranged by crystallographic symmetry.
867#[derive(Debug, Clone)]
868#[allow(dead_code)]
869pub struct BiologicalAssembly {
870    /// Assembly number (as in REMARK 350).
871    pub id: u32,
872    /// All chains in this assembly.
873    pub chains: Vec<AssemblyChain>,
874}
875
876#[allow(dead_code)]
877impl BiologicalAssembly {
878    /// Build a biological assembly from a `PdbModel` using the identity transform.
879    pub fn from_model_with_identity(model: &PdbModel, id: u32) -> Self {
880        // Group atoms by chain
881        let mut chain_map: std::collections::HashMap<char, Vec<PdbAtom>> =
882            std::collections::HashMap::new();
883        for atom in &model.atoms {
884            chain_map
885                .entry(atom.chain_id)
886                .or_default()
887                .push(atom.clone());
888        }
889        let mut chains: Vec<AssemblyChain> = chain_map
890            .into_iter()
891            .map(|(chain_id, atoms)| AssemblyChain { chain_id, atoms })
892            .collect();
893        chains.sort_by_key(|c| c.chain_id);
894        Self { id, chains }
895    }
896
897    /// Apply a 4×3 rotation-translation matrix to all atoms.
898    ///
899    /// Matrix format: `[[r00,r01,r02,tx\],[r10,r11,r12,ty],[r20,r21,r22,tz]]`
900    pub fn apply_transform(&self, m: &[[f64; 4]; 3]) -> Self {
901        let transform_atom = |atom: &PdbAtom| -> PdbAtom {
902            let x = m[0][0] * atom.x + m[0][1] * atom.y + m[0][2] * atom.z + m[0][3];
903            let y = m[1][0] * atom.x + m[1][1] * atom.y + m[1][2] * atom.z + m[1][3];
904            let z = m[2][0] * atom.x + m[2][1] * atom.y + m[2][2] * atom.z + m[2][3];
905            let mut a = atom.clone();
906            a.x = x;
907            a.y = y;
908            a.z = z;
909            a
910        };
911        let chains = self
912            .chains
913            .iter()
914            .map(|c| AssemblyChain {
915                chain_id: c.chain_id,
916                atoms: c.atoms.iter().map(transform_atom).collect(),
917            })
918            .collect();
919        Self {
920            id: self.id,
921            chains,
922        }
923    }
924
925    /// Total number of atoms across all chains.
926    pub fn total_atoms(&self) -> usize {
927        self.chains.iter().map(|c| c.atoms.len()).sum()
928    }
929
930    /// All chain IDs present.
931    pub fn chain_ids(&self) -> Vec<char> {
932        self.chains.iter().map(|c| c.chain_id).collect()
933    }
934}
935
936// ---------------------------------------------------------------------------
937// Symmetry Operations
938// ---------------------------------------------------------------------------
939
940/// A crystallographic symmetry operation: rotation + translation.
941///
942/// Stored as a 3×4 matrix: `[[r00,r01,r02,tx\],[r10,r11,r12,ty],[r20,r21,r22,tz]]`.
943#[derive(Debug, Clone)]
944#[allow(dead_code)]
945pub struct SymmetryOperation {
946    /// The 3×4 transformation matrix.
947    pub matrix: [[f64; 4]; 3],
948    /// Optional human-readable description (e.g., "X,Y,Z").
949    pub description: String,
950}
951
952#[allow(dead_code)]
953impl SymmetryOperation {
954    /// The identity operation.
955    pub fn identity() -> Self {
956        Self {
957            matrix: [
958                [1.0, 0.0, 0.0, 0.0],
959                [0.0, 1.0, 0.0, 0.0],
960                [0.0, 0.0, 1.0, 0.0],
961            ],
962            description: "X,Y,Z".to_string(),
963        }
964    }
965
966    /// A pure translation.
967    pub fn translation(t: [f64; 3]) -> Self {
968        Self {
969            matrix: [
970                [1.0, 0.0, 0.0, t[0]],
971                [0.0, 1.0, 0.0, t[1]],
972                [0.0, 0.0, 1.0, t[2]],
973            ],
974            description: format!("T({},{},{})", t[0], t[1], t[2]),
975        }
976    }
977
978    /// Apply the symmetry operation to a 3D point.
979    pub fn apply(&self, p: [f64; 3]) -> [f64; 3] {
980        let m = &self.matrix;
981        [
982            m[0][0] * p[0] + m[0][1] * p[1] + m[0][2] * p[2] + m[0][3],
983            m[1][0] * p[0] + m[1][1] * p[1] + m[1][2] * p[2] + m[1][3],
984            m[2][0] * p[0] + m[2][1] * p[1] + m[2][2] * p[2] + m[2][3],
985        ]
986    }
987
988    /// Apply the symmetry operation to a list of atoms.
989    pub fn apply_to_atoms(&self, atoms: &[PdbAtom]) -> Vec<PdbAtom> {
990        atoms
991            .iter()
992            .map(|a| {
993                let [x, y, z] = self.apply([a.x, a.y, a.z]);
994                let mut new_a = a.clone();
995                new_a.x = x;
996                new_a.y = y;
997                new_a.z = z;
998                new_a
999            })
1000            .collect()
1001    }
1002
1003    /// Generate all symmetry-equivalent copies of the atoms.
1004    pub fn generate_copies(ops: &[SymmetryOperation], atoms: &[PdbAtom]) -> Vec<Vec<PdbAtom>> {
1005        ops.iter().map(|op| op.apply_to_atoms(atoms)).collect()
1006    }
1007}
1008
1009/// Parse REMARK 290 crystallographic symmetry operations from PDB content.
1010#[allow(dead_code)]
1011pub fn parse_symmetry_operations(content: &str) -> Vec<SymmetryOperation> {
1012    let mut ops = Vec::new();
1013    let mut in_remark_290 = false;
1014    let mut current_matrix: Vec<[f64; 4]> = Vec::new();
1015
1016    for line in content.lines() {
1017        if line.starts_with("REMARK 290") {
1018            in_remark_290 = true;
1019            // Look for SMTRY lines
1020            if line.contains("SMTRY") {
1021                let parts: Vec<&str> = line.split_whitespace().collect();
1022                if parts.len() >= 8
1023                    && let (Ok(r1), Ok(r2), Ok(r3), Ok(t)) = (
1024                        parts[4].parse::<f64>(),
1025                        parts[5].parse::<f64>(),
1026                        parts[6].parse::<f64>(),
1027                        parts[7].parse::<f64>(),
1028                    )
1029                {
1030                    current_matrix.push([r1, r2, r3, t]);
1031                    if current_matrix.len() == 3 {
1032                        ops.push(SymmetryOperation {
1033                            matrix: [current_matrix[0], current_matrix[1], current_matrix[2]],
1034                            description: String::new(),
1035                        });
1036                        current_matrix.clear();
1037                    }
1038                }
1039            }
1040        } else if in_remark_290 && !line.starts_with("REMARK") {
1041            in_remark_290 = false;
1042        }
1043    }
1044
1045    // Always include identity if no ops found
1046    if ops.is_empty() {
1047        ops.push(SymmetryOperation::identity());
1048    }
1049    ops
1050}
1051
1052// ---------------------------------------------------------------------------
1053// PDB Validation
1054// ---------------------------------------------------------------------------
1055
1056/// Result of validating a PDB model.
1057#[derive(Debug, Clone, Default)]
1058#[allow(dead_code)]
1059pub struct PdbValidationResult {
1060    /// Fatal errors (model should not be used as-is).
1061    pub errors: Vec<String>,
1062    /// Warnings (model is usable but has issues).
1063    pub warnings: Vec<String>,
1064}
1065
1066#[allow(dead_code)]
1067impl PdbValidationResult {
1068    /// Whether validation passed with no errors.
1069    pub fn is_valid(&self) -> bool {
1070        self.errors.is_empty()
1071    }
1072
1073    /// Whether there are any issues (errors or warnings).
1074    pub fn has_issues(&self) -> bool {
1075        !self.errors.is_empty() || !self.warnings.is_empty()
1076    }
1077}
1078
1079/// Validate a `PdbModel` for common errors and warnings.
1080///
1081/// Checks:
1082/// - Duplicate atom serial numbers (error)
1083/// - Atoms with extreme coordinates (warning)
1084/// - Missing occupancy (warning)
1085/// - Empty model (warning)
1086#[allow(dead_code)]
1087pub fn validate_pdb_model(model: &PdbModel) -> PdbValidationResult {
1088    let mut result = PdbValidationResult::default();
1089
1090    if model.atoms.is_empty() {
1091        result.warnings.push("model has no atoms".to_string());
1092        return result;
1093    }
1094
1095    // Check duplicate serial numbers
1096    let mut seen_serials = std::collections::HashSet::new();
1097    for atom in &model.atoms {
1098        if !seen_serials.insert(atom.serial) {
1099            result
1100                .errors
1101                .push(format!("duplicate atom serial number: {}", atom.serial));
1102        }
1103    }
1104
1105    // Check extreme coordinates
1106    for atom in &model.atoms {
1107        let max_coord = atom.x.abs().max(atom.y.abs()).max(atom.z.abs());
1108        if max_coord > 9999.0 {
1109            result.warnings.push(format!(
1110                "atom {} has extreme coordinate ({:.1}, {:.1}, {:.1})",
1111                atom.serial, atom.x, atom.y, atom.z
1112            ));
1113        }
1114    }
1115
1116    // Check occupancy values
1117    for atom in &model.atoms {
1118        if atom.occupancy < 0.0 || atom.occupancy > 1.0 {
1119            result.warnings.push(format!(
1120                "atom {} has out-of-range occupancy: {}",
1121                atom.serial, atom.occupancy
1122            ));
1123        }
1124    }
1125
1126    // Check B-factors
1127    for atom in &model.atoms {
1128        if atom.temp_factor < 0.0 {
1129            result.warnings.push(format!(
1130                "atom {} has negative B-factor: {}",
1131                atom.serial, atom.temp_factor
1132            ));
1133        }
1134    }
1135
1136    result
1137}
1138
1139/// Validate a full multi-model PDB file.
1140#[allow(dead_code)]
1141pub fn validate_pdb_file(pdb_file: &PdbFile) -> Vec<PdbValidationResult> {
1142    pdb_file.models.iter().map(validate_pdb_model).collect()
1143}
1144
1145#[cfg(test)]
1146mod tests {
1147    use super::*;
1148
1149    fn make_atom(
1150        serial: u32,
1151        name: &str,
1152        res: &str,
1153        chain: char,
1154        x: f64,
1155        y: f64,
1156        z: f64,
1157    ) -> PdbAtom {
1158        PdbAtom::new(
1159            serial,
1160            name.to_string(),
1161            res.to_string(),
1162            chain,
1163            serial,
1164            x,
1165            y,
1166            z,
1167            1.0,
1168            0.0,
1169            String::new(),
1170        )
1171    }
1172
1173    #[test]
1174    fn test_pdb_write_and_read_roundtrip() {
1175        let path = "/tmp/oxiphy_test.pdb";
1176        let atoms = vec![
1177            make_atom(1, "CA", "ALA", 'A', 1.234, 5.678, 9.012),
1178            make_atom(2, "N", "GLY", 'A', -1.0, 2.5, 3.75),
1179        ];
1180        PdbWriter::write_frame(path, &atoms).unwrap();
1181        let read_atoms = PdbReader::read(path).unwrap();
1182        assert_eq!(read_atoms.len(), 2);
1183        assert!((read_atoms[0].x - 1.234).abs() < 0.01);
1184        assert!((read_atoms[1].z - 3.75).abs() < 0.01);
1185        assert_eq!(read_atoms[0].name, "CA");
1186        assert_eq!(read_atoms[1].res_name, "GLY");
1187        std::fs::remove_file(path).ok();
1188    }
1189
1190    #[test]
1191    fn test_pdb_write_read_roundtrip() {
1192        let path = "/tmp/oxiphy_test_rt3.pdb";
1193        let atoms = vec![
1194            make_atom(1, "N", "ALA", 'A', 10.111, 20.222, 30.333),
1195            make_atom(2, "CA", "ALA", 'A', -5.555, 0.001, 15.999),
1196            make_atom(3, "C", "GLY", 'B', 0.0, 0.0, 0.0),
1197        ];
1198        PdbWriter::write_frame(path, &atoms).unwrap();
1199        let read_atoms = PdbReader::read(path).unwrap();
1200
1201        assert_eq!(read_atoms.len(), 3, "expected 3 atoms");
1202
1203        assert!((read_atoms[0].x - 10.111).abs() < 0.001);
1204        assert!((read_atoms[0].y - 20.222).abs() < 0.001);
1205        assert!((read_atoms[0].z - 30.333).abs() < 0.001);
1206        assert!((read_atoms[1].x - (-5.555)).abs() < 0.001);
1207        assert!((read_atoms[1].y - 0.001).abs() < 0.001);
1208        assert!((read_atoms[1].z - 15.999).abs() < 0.001);
1209        assert!((read_atoms[2].x - 0.0).abs() < 0.001);
1210        assert!((read_atoms[2].y - 0.0).abs() < 0.001);
1211        assert!((read_atoms[2].z - 0.0).abs() < 0.001);
1212
1213        std::fs::remove_file(path).ok();
1214    }
1215
1216    #[test]
1217    fn test_pdb_column_widths() {
1218        let path = "/tmp/oxiphy_test_cols.pdb";
1219        let atoms = vec![make_atom(1, "CA", "ALA", 'A', 1.0, 2.0, 3.0)];
1220        PdbWriter::write_frame(path, &atoms).unwrap();
1221        let content = std::fs::read_to_string(path).unwrap();
1222        let line = content.lines().next().unwrap();
1223        assert!(line.starts_with("ATOM  "));
1224        let x_str = &line[30..38];
1225        assert!(x_str.trim().parse::<f64>().is_ok());
1226        std::fs::remove_file(path).ok();
1227    }
1228
1229    #[test]
1230    fn test_parse_pdb_minimal() {
1231        let pdb_str = "\
1232TITLE     Test molecule
1233REMARK Test remark
1234ATOM      1  CA  ALA A   1       1.234   5.678   9.012  1.00  0.00           C
1235ATOM      2  N   GLY A   2      -1.000   2.500   3.750  1.00  0.00           N
1236END
1237";
1238        let model = parse_pdb(pdb_str).unwrap();
1239        assert_eq!(model.atoms.len(), 2);
1240        assert_eq!(model.title, "Test molecule");
1241        assert_eq!(model.remarks.len(), 1);
1242
1243        let a0 = &model.atoms[0];
1244        assert_eq!(a0.name, "CA");
1245        assert_eq!(a0.res_name, "ALA");
1246        assert!((a0.x - 1.234).abs() < 0.001);
1247        assert!((a0.y - 5.678).abs() < 0.001);
1248        assert!((a0.z - 9.012).abs() < 0.001);
1249
1250        let a1 = &model.atoms[1];
1251        assert_eq!(a1.name, "N");
1252        assert!((a1.x - (-1.0)).abs() < 0.001);
1253        assert!((a1.z - 3.75).abs() < 0.001);
1254    }
1255
1256    #[test]
1257    fn test_parse_write_parse_roundtrip() {
1258        let pdb_str = "\
1259TITLE     Roundtrip test
1260ATOM      1  CA  ALA A   1      10.000  20.000  30.000  1.00  5.00
1261ATOM      2  N   GLY B   2      -5.000   0.500  15.000  0.50 10.00
1262END
1263";
1264        let model1 = parse_pdb(pdb_str).unwrap();
1265        let written = write_pdb(&model1);
1266        let model2 = parse_pdb(&written).unwrap();
1267
1268        assert_eq!(model1.atoms.len(), model2.atoms.len());
1269        for (a, b) in model1.atoms.iter().zip(model2.atoms.iter()) {
1270            assert!((a.x - b.x).abs() < 0.001);
1271            assert!((a.y - b.y).abs() < 0.001);
1272            assert!((a.z - b.z).abs() < 0.001);
1273        }
1274    }
1275
1276    // ─── New tests ────────────────────────────────────────────────────
1277
1278    #[test]
1279    fn test_hetatm_parsing() {
1280        let pdb_str = "\
1281HETATM    1  O   HOH A   1       1.000   2.000   3.000  1.00  0.00           O
1282END
1283";
1284        let model = parse_pdb(pdb_str).unwrap();
1285        assert_eq!(model.atoms.len(), 1);
1286        assert!(model.atoms[0].is_hetatm);
1287        assert_eq!(model.atoms[0].res_name, "HOH");
1288    }
1289
1290    #[test]
1291    fn test_hetatm_write_roundtrip() {
1292        let mut atom = make_atom(1, "O", "HOH", 'A', 1.0, 2.0, 3.0);
1293        atom.is_hetatm = true;
1294        let model = PdbModel {
1295            atoms: vec![atom],
1296            remarks: Vec::new(),
1297            title: String::new(),
1298            conect_records: Vec::new(),
1299            cryst1: None,
1300            model_number: None,
1301        };
1302        let written = write_pdb(&model);
1303        assert!(written.contains("HETATM"));
1304        let parsed = parse_pdb(&written).unwrap();
1305        assert!(parsed.atoms[0].is_hetatm);
1306    }
1307
1308    #[test]
1309    fn test_conect_record_parsing() {
1310        let pdb_str = "\
1311ATOM      1  CA  ALA A   1       1.000   2.000   3.000  1.00  0.00           C
1312ATOM      2  N   ALA A   1       4.000   5.000   6.000  1.00  0.00           N
1313CONECT    1    2
1314END
1315";
1316        let model = parse_pdb(pdb_str).unwrap();
1317        assert_eq!(model.conect_records.len(), 1);
1318        assert_eq!(model.conect_records[0].atom_serial, 1);
1319        assert_eq!(model.conect_records[0].bonded_atoms, vec![2]);
1320    }
1321
1322    #[test]
1323    fn test_conect_multiple_bonds() {
1324        let line = "CONECT    1    2    3    4";
1325        let conect = parse_conect_line(line).unwrap();
1326        assert_eq!(conect.atom_serial, 1);
1327        assert_eq!(conect.bonded_atoms, vec![2, 3, 4]);
1328    }
1329
1330    #[test]
1331    fn test_conect_write_roundtrip() {
1332        let model = PdbModel {
1333            atoms: vec![
1334                make_atom(1, "CA", "ALA", 'A', 1.0, 0.0, 0.0),
1335                make_atom(2, "N", "ALA", 'A', 2.0, 0.0, 0.0),
1336            ],
1337            remarks: Vec::new(),
1338            title: String::new(),
1339            conect_records: vec![ConectRecord {
1340                atom_serial: 1,
1341                bonded_atoms: vec![2],
1342            }],
1343            cryst1: None,
1344            model_number: None,
1345        };
1346        let written = write_pdb(&model);
1347        assert!(written.contains("CONECT"));
1348        let parsed = parse_pdb(&written).unwrap();
1349        assert_eq!(parsed.conect_records.len(), 1);
1350        assert_eq!(parsed.conect_records[0].atom_serial, 1);
1351    }
1352
1353    #[test]
1354    fn test_cryst1_parsing() {
1355        let pdb_str = "\
1356CRYST1   50.000   60.000   70.000  90.00  90.00  90.00 P 1           1
1357ATOM      1  CA  ALA A   1       1.000   2.000   3.000  1.00  0.00
1358END
1359";
1360        let model = parse_pdb(pdb_str).unwrap();
1361        let cryst = model.cryst1.unwrap();
1362        assert!((cryst.a - 50.0).abs() < 0.01);
1363        assert!((cryst.b - 60.0).abs() < 0.01);
1364        assert!((cryst.c - 70.0).abs() < 0.01);
1365        assert!((cryst.alpha - 90.0).abs() < 0.01);
1366    }
1367
1368    #[test]
1369    fn test_cryst1_write_roundtrip() {
1370        let model = PdbModel {
1371            atoms: vec![make_atom(1, "CA", "ALA", 'A', 1.0, 2.0, 3.0)],
1372            remarks: Vec::new(),
1373            title: String::new(),
1374            conect_records: Vec::new(),
1375            cryst1: Some(Cryst1Record {
1376                a: 50.0,
1377                b: 60.0,
1378                c: 70.0,
1379                alpha: 90.0,
1380                beta: 90.0,
1381                gamma: 90.0,
1382                space_group: "P 1".to_string(),
1383                z_value: 1,
1384            }),
1385            model_number: None,
1386        };
1387        let written = write_pdb(&model);
1388        assert!(written.contains("CRYST1"));
1389        let parsed = parse_pdb(&written).unwrap();
1390        let cryst = parsed.cryst1.unwrap();
1391        assert!((cryst.a - 50.0).abs() < 0.01);
1392    }
1393
1394    #[test]
1395    fn test_multi_model_pdb() {
1396        let pdb_str = "\
1397TITLE     Multi model test
1398MODEL        1
1399ATOM      1  CA  ALA A   1       1.000   2.000   3.000  1.00  0.00
1400ENDMDL
1401MODEL        2
1402ATOM      1  CA  ALA A   1       4.000   5.000   6.000  1.00  0.00
1403ENDMDL
1404END
1405";
1406        let pdb_file = parse_multi_model_pdb(pdb_str).unwrap();
1407        assert_eq!(pdb_file.models.len(), 2);
1408        assert_eq!(pdb_file.title, "Multi model test");
1409        assert!((pdb_file.models[0].atoms[0].x - 1.0).abs() < 0.01);
1410        assert!((pdb_file.models[1].atoms[0].x - 4.0).abs() < 0.01);
1411        assert_eq!(pdb_file.models[0].model_number, Some(1));
1412        assert_eq!(pdb_file.models[1].model_number, Some(2));
1413    }
1414
1415    #[test]
1416    fn test_multi_model_write_roundtrip() {
1417        let pdb_file = PdbFile {
1418            models: vec![
1419                PdbModel {
1420                    atoms: vec![make_atom(1, "CA", "ALA", 'A', 1.0, 2.0, 3.0)],
1421                    remarks: Vec::new(),
1422                    title: String::new(),
1423                    conect_records: Vec::new(),
1424                    cryst1: None,
1425                    model_number: Some(1),
1426                },
1427                PdbModel {
1428                    atoms: vec![make_atom(1, "CA", "ALA", 'A', 4.0, 5.0, 6.0)],
1429                    remarks: Vec::new(),
1430                    title: String::new(),
1431                    conect_records: Vec::new(),
1432                    cryst1: None,
1433                    model_number: Some(2),
1434                },
1435            ],
1436            remarks: Vec::new(),
1437            title: "Test".to_string(),
1438            cryst1: None,
1439        };
1440        let written = write_multi_model_pdb(&pdb_file);
1441        assert!(written.contains("MODEL"));
1442        assert!(written.contains("ENDMDL"));
1443        let parsed = parse_multi_model_pdb(&written).unwrap();
1444        assert_eq!(parsed.models.len(), 2);
1445    }
1446
1447    #[test]
1448    fn test_single_model_as_multi() {
1449        let pdb_str = "\
1450ATOM      1  CA  ALA A   1       1.000   2.000   3.000  1.00  0.00
1451END
1452";
1453        let pdb_file = parse_multi_model_pdb(pdb_str).unwrap();
1454        assert_eq!(pdb_file.models.len(), 1);
1455    }
1456
1457    #[test]
1458    fn test_occupancy_and_bfactor() {
1459        let pdb_str = "\
1460ATOM      1  CA  ALA A   1       1.000   2.000   3.000  0.75 25.00
1461END
1462";
1463        let model = parse_pdb(pdb_str).unwrap();
1464        assert!((model.atoms[0].occupancy - 0.75).abs() < 0.01);
1465        assert!((model.atoms[0].temp_factor - 25.0).abs() < 0.01);
1466    }
1467
1468    #[test]
1469    fn test_occupancy_bfactor_roundtrip() {
1470        let atom = PdbAtom::new(
1471            1,
1472            "CA".into(),
1473            "ALA".into(),
1474            'A',
1475            1,
1476            1.0,
1477            2.0,
1478            3.0,
1479            0.50,
1480            30.0,
1481            "C".into(),
1482        );
1483        let model = PdbModel {
1484            atoms: vec![atom],
1485            remarks: Vec::new(),
1486            title: String::new(),
1487            conect_records: Vec::new(),
1488            cryst1: None,
1489            model_number: None,
1490        };
1491        let written = write_pdb(&model);
1492        let parsed = parse_pdb(&written).unwrap();
1493        assert!((parsed.atoms[0].occupancy - 0.50).abs() < 0.01);
1494        assert!((parsed.atoms[0].temp_factor - 30.0).abs() < 0.01);
1495    }
1496
1497    #[test]
1498    fn test_filter_by_chain() {
1499        let atoms = vec![
1500            make_atom(1, "CA", "ALA", 'A', 1.0, 0.0, 0.0),
1501            make_atom(2, "CA", "GLY", 'B', 2.0, 0.0, 0.0),
1502            make_atom(3, "N", "ALA", 'A', 3.0, 0.0, 0.0),
1503        ];
1504        let chain_a = filter_by_chain(&atoms, 'A');
1505        assert_eq!(chain_a.len(), 2);
1506        let chain_b = filter_by_chain(&atoms, 'B');
1507        assert_eq!(chain_b.len(), 1);
1508        let chain_c = filter_by_chain(&atoms, 'C');
1509        assert!(chain_c.is_empty());
1510    }
1511
1512    #[test]
1513    fn test_unique_chain_ids() {
1514        let atoms = vec![
1515            make_atom(1, "CA", "ALA", 'B', 0.0, 0.0, 0.0),
1516            make_atom(2, "CA", "ALA", 'A', 0.0, 0.0, 0.0),
1517            make_atom(3, "CA", "ALA", 'B', 0.0, 0.0, 0.0),
1518        ];
1519        let chains = unique_chain_ids(&atoms);
1520        assert_eq!(chains, vec!['A', 'B']);
1521    }
1522
1523    #[test]
1524    fn test_unique_residue_names() {
1525        let atoms = vec![
1526            make_atom(1, "CA", "GLY", 'A', 0.0, 0.0, 0.0),
1527            make_atom(2, "CA", "ALA", 'A', 0.0, 0.0, 0.0),
1528            make_atom(3, "CA", "GLY", 'A', 0.0, 0.0, 0.0),
1529        ];
1530        let names = unique_residue_names(&atoms);
1531        assert_eq!(names, vec!["ALA", "GLY"]);
1532    }
1533
1534    #[test]
1535    fn test_center_of_mass() {
1536        let atoms = vec![
1537            make_atom(1, "CA", "ALA", 'A', 0.0, 0.0, 0.0),
1538            make_atom(2, "CA", "ALA", 'A', 10.0, 0.0, 0.0),
1539        ];
1540        let com = center_of_mass(&atoms);
1541        assert!((com[0] - 5.0).abs() < 1e-10);
1542        assert!(com[1].abs() < 1e-10);
1543    }
1544
1545    #[test]
1546    fn test_center_of_mass_empty() {
1547        let com = center_of_mass(&[]);
1548        assert_eq!(com, [0.0, 0.0, 0.0]);
1549    }
1550
1551    #[test]
1552    fn test_atom_distance() {
1553        let a = make_atom(1, "CA", "ALA", 'A', 0.0, 0.0, 0.0);
1554        let b = make_atom(2, "CA", "ALA", 'A', 3.0, 4.0, 0.0);
1555        assert!((a.distance_to(&b) - 5.0).abs() < 1e-10);
1556    }
1557
1558    #[test]
1559    fn test_cryst1_default() {
1560        let cryst = Cryst1Record::default();
1561        assert!((cryst.a - 1.0).abs() < 1e-10);
1562        assert!((cryst.alpha - 90.0).abs() < 1e-10);
1563        assert_eq!(cryst.space_group, "P 1");
1564    }
1565
1566    #[test]
1567    fn test_conect_not_conect() {
1568        assert!(parse_conect_line("ATOM  stuff").is_none());
1569    }
1570
1571    #[test]
1572    fn test_cryst1_not_cryst1() {
1573        assert!(parse_cryst1_line("ATOM  stuff").is_none());
1574    }
1575
1576    #[test]
1577    fn test_parse_atom_line_too_short() {
1578        assert!(parse_atom_line("ATOM  short").is_none());
1579    }
1580
1581    // ── SEQRES tests ─────────────────────────────────────────────────────
1582
1583    #[test]
1584    fn test_seqres_parsing_basic() {
1585        let pdb_str = "\
1586SEQRES   1 A   10  ALA GLY VAL LEU ILE PRO PHE TRP MET SER
1587END
1588";
1589        let records = parse_seqres(pdb_str);
1590        assert_eq!(records.len(), 1);
1591        assert_eq!(records[0].chain_id, 'A');
1592        assert_eq!(records[0].num_residues, 10);
1593        assert_eq!(records[0].residues.len(), 10);
1594        assert_eq!(records[0].residues[0], "ALA");
1595    }
1596
1597    #[test]
1598    fn test_seqres_multi_chain() {
1599        let pdb_str = "\
1600SEQRES   1 A    5  ALA GLY VAL LEU ILE
1601SEQRES   1 B    3  TRP MET SER
1602END
1603";
1604        let records = parse_seqres(pdb_str);
1605        assert_eq!(records.len(), 2);
1606        let chains: Vec<char> = records.iter().map(|r| r.chain_id).collect();
1607        assert!(chains.contains(&'A'));
1608        assert!(chains.contains(&'B'));
1609    }
1610
1611    #[test]
1612    fn test_seqres_sequence_string() {
1613        let pdb_str = "\
1614SEQRES   1 A    3  ALA GLY VAL
1615END
1616";
1617        let records = parse_seqres(pdb_str);
1618        let seq = records[0].sequence_string();
1619        assert_eq!(seq, "ALA-GLY-VAL");
1620    }
1621
1622    // ── Secondary structure tests ──────────────────────────────────────
1623
1624    #[test]
1625    fn test_helix_record_parsing() {
1626        let pdb_str = "\
1627HELIX    1   1 ALA A    1  GLY A   10  1                                  10
1628END
1629";
1630        let records = parse_secondary_structure(pdb_str);
1631        let helices: Vec<_> = records
1632            .iter()
1633            .filter(|r| r.record_type == SecStructType::Helix)
1634            .collect();
1635        assert_eq!(helices.len(), 1);
1636        assert_eq!(helices[0].chain_id, 'A');
1637        assert_eq!(helices[0].start_res, 1);
1638        assert_eq!(helices[0].end_res, 10);
1639    }
1640
1641    #[test]
1642    fn test_sheet_record_parsing() {
1643        let pdb_str = "\
1644SHEET    1   A 2 ALA A   1  GLY A   5  0
1645END
1646";
1647        let records = parse_secondary_structure(pdb_str);
1648        let sheets: Vec<_> = records
1649            .iter()
1650            .filter(|r| r.record_type == SecStructType::Sheet)
1651            .collect();
1652        assert_eq!(sheets.len(), 1);
1653    }
1654
1655    #[test]
1656    fn test_turn_record_parsing() {
1657        let pdb_str = "\
1658TURN     1 T1  ALA A    1  GLY A    4
1659END
1660";
1661        let records = parse_secondary_structure(pdb_str);
1662        let turns: Vec<_> = records
1663            .iter()
1664            .filter(|r| r.record_type == SecStructType::Turn)
1665            .collect();
1666        assert_eq!(turns.len(), 1);
1667    }
1668
1669    // ── Biological assembly tests ──────────────────────────────────────
1670
1671    #[test]
1672    fn test_biological_assembly_construction() {
1673        let atoms = vec![
1674            make_atom(1, "CA", "ALA", 'A', 1.0, 0.0, 0.0),
1675            make_atom(2, "CA", "GLY", 'A', 2.0, 0.0, 0.0),
1676        ];
1677        let model = PdbModel {
1678            atoms: atoms.clone(),
1679            remarks: Vec::new(),
1680            title: String::new(),
1681            conect_records: Vec::new(),
1682            cryst1: None,
1683            model_number: None,
1684        };
1685        let assembly = BiologicalAssembly::from_model_with_identity(&model, 1);
1686        assert_eq!(assembly.id, 1);
1687        assert_eq!(assembly.chains.len(), 1); // chain 'A'
1688        assert_eq!(assembly.chains[0].atoms.len(), 2);
1689    }
1690
1691    #[test]
1692    fn test_biological_assembly_apply_transform() {
1693        let atoms = vec![make_atom(1, "CA", "ALA", 'A', 1.0, 0.0, 0.0)];
1694        let model = PdbModel {
1695            atoms,
1696            remarks: Vec::new(),
1697            title: String::new(),
1698            conect_records: Vec::new(),
1699            cryst1: None,
1700            model_number: None,
1701        };
1702        let assembly = BiologicalAssembly::from_model_with_identity(&model, 1);
1703        // Apply translation [1,0,0] → atom moves to [2,0,0]
1704        let transform = [
1705            [1.0, 0.0, 0.0, 1.0],
1706            [0.0, 1.0, 0.0, 0.0],
1707            [0.0, 0.0, 1.0, 0.0],
1708        ];
1709        let transformed = assembly.apply_transform(&transform);
1710        assert!((transformed.chains[0].atoms[0].x - 2.0).abs() < 1e-6);
1711    }
1712
1713    // ── Symmetry operations tests ──────────────────────────────────────
1714
1715    #[test]
1716    fn test_symmetry_operation_identity() {
1717        let sym = SymmetryOperation::identity();
1718        let pt = [1.0, 2.0, 3.0];
1719        let result = sym.apply(pt);
1720        assert!((result[0] - 1.0).abs() < 1e-10);
1721        assert!((result[1] - 2.0).abs() < 1e-10);
1722        assert!((result[2] - 3.0).abs() < 1e-10);
1723    }
1724
1725    #[test]
1726    fn test_symmetry_operation_translation() {
1727        let sym = SymmetryOperation::translation([5.0, 0.0, 0.0]);
1728        let pt = [1.0, 2.0, 3.0];
1729        let result = sym.apply(pt);
1730        assert!((result[0] - 6.0).abs() < 1e-10);
1731        assert!((result[1] - 2.0).abs() < 1e-10);
1732        assert!((result[2] - 3.0).abs() < 1e-10);
1733    }
1734
1735    #[test]
1736    fn test_symmetry_apply_to_atoms() {
1737        let atoms = vec![make_atom(1, "CA", "ALA", 'A', 1.0, 2.0, 3.0)];
1738        let sym = SymmetryOperation::translation([10.0, 0.0, 0.0]);
1739        let moved = sym.apply_to_atoms(&atoms);
1740        assert!((moved[0].x - 11.0).abs() < 1e-6);
1741    }
1742
1743    // ── PDB validation tests ───────────────────────────────────────────
1744
1745    #[test]
1746    fn test_pdb_validate_valid_model() {
1747        let atoms = vec![
1748            make_atom(1, "CA", "ALA", 'A', 1.0, 2.0, 3.0),
1749            make_atom(2, "N", "ALA", 'A', 2.0, 3.0, 4.0),
1750        ];
1751        let model = PdbModel {
1752            atoms,
1753            remarks: Vec::new(),
1754            title: "Test".to_string(),
1755            conect_records: Vec::new(),
1756            cryst1: None,
1757            model_number: None,
1758        };
1759        let result = validate_pdb_model(&model);
1760        assert!(result.is_valid(), "valid model should pass validation");
1761    }
1762
1763    #[test]
1764    fn test_pdb_validate_duplicate_serial() {
1765        let mut a1 = make_atom(1, "CA", "ALA", 'A', 1.0, 0.0, 0.0);
1766        let mut a2 = make_atom(1, "N", "ALA", 'A', 2.0, 0.0, 0.0); // duplicate serial
1767        a1.serial = 1;
1768        a2.serial = 1;
1769        let model = PdbModel {
1770            atoms: vec![a1, a2],
1771            remarks: Vec::new(),
1772            title: String::new(),
1773            conect_records: Vec::new(),
1774            cryst1: None,
1775            model_number: None,
1776        };
1777        let result = validate_pdb_model(&model);
1778        assert!(
1779            !result.is_valid(),
1780            "duplicate serials should fail validation"
1781        );
1782        assert!(!result.errors.is_empty());
1783    }
1784
1785    #[test]
1786    fn test_pdb_validate_extreme_coordinates() {
1787        let atom = PdbAtom {
1788            serial: 1,
1789            name: "CA".to_string(),
1790            res_name: "ALA".to_string(),
1791            chain_id: 'A',
1792            res_seq: 1,
1793            x: 1e10, // unreasonably large
1794            y: 0.0,
1795            z: 0.0,
1796            occupancy: 1.0,
1797            temp_factor: 0.0,
1798            element: "C".to_string(),
1799            is_hetatm: false,
1800        };
1801        let model = PdbModel {
1802            atoms: vec![atom],
1803            remarks: Vec::new(),
1804            title: String::new(),
1805            conect_records: Vec::new(),
1806            cryst1: None,
1807            model_number: None,
1808        };
1809        let result = validate_pdb_model(&model);
1810        assert!(
1811            result.warnings.iter().any(|w| w.contains("coordinate")),
1812            "extreme coordinate should generate a warning"
1813        );
1814    }
1815
1816    #[test]
1817    fn test_pdb_validate_empty_model() {
1818        let model = PdbModel {
1819            atoms: Vec::new(),
1820            remarks: Vec::new(),
1821            title: String::new(),
1822            conect_records: Vec::new(),
1823            cryst1: None,
1824            model_number: None,
1825        };
1826        let result = validate_pdb_model(&model);
1827        assert!(
1828            result.warnings.iter().any(|w| w.contains("no atoms")),
1829            "empty model should warn about missing atoms"
1830        );
1831    }
1832}